{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2781, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002157497303128371, "grad_norm": 2.5762534141540527, "learning_rate": 1.4285714285714284e-08, "loss": 0.9756889939308167, "step": 2 }, { "epoch": 0.004314994606256742, "grad_norm": 4.507562637329102, "learning_rate": 4.285714285714286e-08, "loss": 0.661911129951477, "step": 4 }, { "epoch": 0.006472491909385114, "grad_norm": 5.074595928192139, "learning_rate": 7.142857142857142e-08, "loss": 0.8359099626541138, "step": 6 }, { "epoch": 0.008629989212513484, "grad_norm": 1.9605220556259155, "learning_rate": 1e-07, "loss": 0.8231168985366821, "step": 8 }, { "epoch": 0.010787486515641856, "grad_norm": 1.6714932918548584, "learning_rate": 1.2857142857142855e-07, "loss": 0.7096143960952759, "step": 10 }, { "epoch": 0.012944983818770227, "grad_norm": 4.321066379547119, "learning_rate": 1.5714285714285714e-07, "loss": 0.7912081480026245, "step": 12 }, { "epoch": 0.015102481121898598, "grad_norm": 8.806991577148438, "learning_rate": 1.8571428571428572e-07, "loss": 0.856565535068512, "step": 14 }, { "epoch": 0.017259978425026967, "grad_norm": 4.99681282043457, "learning_rate": 2.1428571428571426e-07, "loss": 1.1687901020050049, "step": 16 }, { "epoch": 0.019417475728155338, "grad_norm": 2.5645041465759277, "learning_rate": 2.4285714285714287e-07, "loss": 1.0152899026870728, "step": 18 }, { "epoch": 0.021574973031283712, "grad_norm": 1.8295631408691406, "learning_rate": 2.714285714285714e-07, "loss": 0.6205134987831116, "step": 20 }, { "epoch": 0.023732470334412083, "grad_norm": 4.249131202697754, "learning_rate": 3e-07, "loss": 0.9060122966766357, "step": 22 }, { "epoch": 0.025889967637540454, "grad_norm": 3.811307191848755, "learning_rate": 3.2857142857142857e-07, "loss": 0.8020558953285217, "step": 24 }, { "epoch": 0.028047464940668825, "grad_norm": 3.780266284942627, "learning_rate": 3.5714285714285716e-07, "loss": 0.7707346677780151, "step": 26 }, { "epoch": 0.030204962243797196, "grad_norm": 2.9128313064575195, "learning_rate": 3.857142857142857e-07, "loss": 0.6942681670188904, "step": 28 }, { "epoch": 0.032362459546925564, "grad_norm": 3.999141216278076, "learning_rate": 4.142857142857143e-07, "loss": 1.0023633241653442, "step": 30 }, { "epoch": 0.034519956850053934, "grad_norm": 1.0977139472961426, "learning_rate": 4.428571428571428e-07, "loss": 0.6870981454849243, "step": 32 }, { "epoch": 0.036677454153182305, "grad_norm": 7.128520965576172, "learning_rate": 4.714285714285714e-07, "loss": 0.9697052836418152, "step": 34 }, { "epoch": 0.038834951456310676, "grad_norm": 2.706787109375, "learning_rate": 5e-07, "loss": 0.8704000115394592, "step": 36 }, { "epoch": 0.040992448759439054, "grad_norm": 28.9028377532959, "learning_rate": 5.285714285714286e-07, "loss": 1.3768728971481323, "step": 38 }, { "epoch": 0.043149946062567425, "grad_norm": 5.019951343536377, "learning_rate": 5.571428571428571e-07, "loss": 0.5050771832466125, "step": 40 }, { "epoch": 0.045307443365695796, "grad_norm": 1.896100401878357, "learning_rate": 5.857142857142857e-07, "loss": 0.7155470848083496, "step": 42 }, { "epoch": 0.04746494066882417, "grad_norm": 1.3924132585525513, "learning_rate": 6.142857142857143e-07, "loss": 0.8246269822120667, "step": 44 }, { "epoch": 0.04962243797195254, "grad_norm": 2.244379758834839, "learning_rate": 6.428571428571429e-07, "loss": 0.6300725340843201, "step": 46 }, { "epoch": 0.05177993527508091, "grad_norm": 4.4260573387146, "learning_rate": 6.714285714285714e-07, "loss": 0.7466659545898438, "step": 48 }, { "epoch": 0.05393743257820928, "grad_norm": 3.2849934101104736, "learning_rate": 7e-07, "loss": 0.5939810276031494, "step": 50 }, { "epoch": 0.05609492988133765, "grad_norm": 1.4281387329101562, "learning_rate": 7.285714285714286e-07, "loss": 0.36054807901382446, "step": 52 }, { "epoch": 0.05825242718446602, "grad_norm": 6.341257095336914, "learning_rate": 7.57142857142857e-07, "loss": 0.8747532367706299, "step": 54 }, { "epoch": 0.06040992448759439, "grad_norm": 14.657182693481445, "learning_rate": 7.857142857142856e-07, "loss": 0.991641640663147, "step": 56 }, { "epoch": 0.06256742179072276, "grad_norm": 1.8714779615402222, "learning_rate": 8.142857142857142e-07, "loss": 0.6384595632553101, "step": 58 }, { "epoch": 0.06472491909385113, "grad_norm": 6.577902793884277, "learning_rate": 8.428571428571428e-07, "loss": 0.7958289384841919, "step": 60 }, { "epoch": 0.0668824163969795, "grad_norm": 6.575786590576172, "learning_rate": 8.714285714285714e-07, "loss": 0.9528671503067017, "step": 62 }, { "epoch": 0.06903991370010787, "grad_norm": 2.090458631515503, "learning_rate": 9e-07, "loss": 0.7380706071853638, "step": 64 }, { "epoch": 0.07119741100323625, "grad_norm": 2.861210584640503, "learning_rate": 9.285714285714285e-07, "loss": 0.8518832921981812, "step": 66 }, { "epoch": 0.07335490830636461, "grad_norm": 2.01114559173584, "learning_rate": 9.571428571428572e-07, "loss": 0.7344475388526917, "step": 68 }, { "epoch": 0.07551240560949299, "grad_norm": 9.70598316192627, "learning_rate": 9.857142857142857e-07, "loss": 0.6952767968177795, "step": 70 }, { "epoch": 0.07766990291262135, "grad_norm": 3.434774160385132, "learning_rate": 1.0142857142857142e-06, "loss": 0.6583250761032104, "step": 72 }, { "epoch": 0.07982740021574973, "grad_norm": 2.2592010498046875, "learning_rate": 1.0428571428571429e-06, "loss": 0.6166390180587769, "step": 74 }, { "epoch": 0.08198489751887811, "grad_norm": 2.191253185272217, "learning_rate": 1.0714285714285714e-06, "loss": 0.6770592927932739, "step": 76 }, { "epoch": 0.08414239482200647, "grad_norm": 1.9236204624176025, "learning_rate": 1.1e-06, "loss": 0.6031355261802673, "step": 78 }, { "epoch": 0.08629989212513485, "grad_norm": 4.434933662414551, "learning_rate": 1.1285714285714285e-06, "loss": 0.949522078037262, "step": 80 }, { "epoch": 0.08845738942826321, "grad_norm": 3.6469240188598633, "learning_rate": 1.1571428571428572e-06, "loss": 0.5610405206680298, "step": 82 }, { "epoch": 0.09061488673139159, "grad_norm": 2.5983078479766846, "learning_rate": 1.1857142857142857e-06, "loss": 0.40821573138237, "step": 84 }, { "epoch": 0.09277238403451996, "grad_norm": 1.9814131259918213, "learning_rate": 1.2142857142857142e-06, "loss": 0.6474723815917969, "step": 86 }, { "epoch": 0.09492988133764833, "grad_norm": 1.5465178489685059, "learning_rate": 1.2428571428571429e-06, "loss": 0.6871901154518127, "step": 88 }, { "epoch": 0.0970873786407767, "grad_norm": 2.41676664352417, "learning_rate": 1.2714285714285714e-06, "loss": 0.6116282343864441, "step": 90 }, { "epoch": 0.09924487594390508, "grad_norm": 1.3548113107681274, "learning_rate": 1.3e-06, "loss": 0.6866545677185059, "step": 92 }, { "epoch": 0.10140237324703344, "grad_norm": 3.994168519973755, "learning_rate": 1.3285714285714285e-06, "loss": 0.7566230297088623, "step": 94 }, { "epoch": 0.10355987055016182, "grad_norm": 0.6952499151229858, "learning_rate": 1.3571428571428572e-06, "loss": 0.5508694648742676, "step": 96 }, { "epoch": 0.10571736785329018, "grad_norm": 2.8167052268981934, "learning_rate": 1.3857142857142857e-06, "loss": 0.6589823961257935, "step": 98 }, { "epoch": 0.10787486515641856, "grad_norm": 20.95288848876953, "learning_rate": 1.4142857142857144e-06, "loss": 0.558512806892395, "step": 100 }, { "epoch": 0.11003236245954692, "grad_norm": 2.7887301445007324, "learning_rate": 1.4428571428571429e-06, "loss": 0.7576460838317871, "step": 102 }, { "epoch": 0.1121898597626753, "grad_norm": 3.9458041191101074, "learning_rate": 1.4714285714285716e-06, "loss": 0.7658395171165466, "step": 104 }, { "epoch": 0.11434735706580366, "grad_norm": 3.11449933052063, "learning_rate": 1.5e-06, "loss": 0.40781867504119873, "step": 106 }, { "epoch": 0.11650485436893204, "grad_norm": 55.18882369995117, "learning_rate": 1.5285714285714283e-06, "loss": 0.7694418430328369, "step": 108 }, { "epoch": 0.1186623516720604, "grad_norm": 2.1952178478240967, "learning_rate": 1.557142857142857e-06, "loss": 0.6382092833518982, "step": 110 }, { "epoch": 0.12081984897518878, "grad_norm": 4.497931957244873, "learning_rate": 1.5857142857142855e-06, "loss": 0.5454550981521606, "step": 112 }, { "epoch": 0.12297734627831715, "grad_norm": 3.0473287105560303, "learning_rate": 1.6142857142857142e-06, "loss": 0.5170645713806152, "step": 114 }, { "epoch": 0.12513484358144553, "grad_norm": 3.3767971992492676, "learning_rate": 1.6428571428571426e-06, "loss": 0.5989764332771301, "step": 116 }, { "epoch": 0.1272923408845739, "grad_norm": 2.4410431385040283, "learning_rate": 1.6714285714285713e-06, "loss": 0.538360595703125, "step": 118 }, { "epoch": 0.12944983818770225, "grad_norm": 2.4122188091278076, "learning_rate": 1.6999999999999998e-06, "loss": 0.5263152122497559, "step": 120 }, { "epoch": 0.13160733549083065, "grad_norm": 1.3902043104171753, "learning_rate": 1.7285714285714285e-06, "loss": 0.6690125465393066, "step": 122 }, { "epoch": 0.133764832793959, "grad_norm": 1.8167104721069336, "learning_rate": 1.757142857142857e-06, "loss": 0.6804316639900208, "step": 124 }, { "epoch": 0.13592233009708737, "grad_norm": 0.8370219469070435, "learning_rate": 1.7857142857142857e-06, "loss": 0.8587678670883179, "step": 126 }, { "epoch": 0.13807982740021574, "grad_norm": 1.6989076137542725, "learning_rate": 1.8142857142857142e-06, "loss": 0.5992355942726135, "step": 128 }, { "epoch": 0.14023732470334413, "grad_norm": 2.9481143951416016, "learning_rate": 1.8428571428571426e-06, "loss": 0.35928595066070557, "step": 130 }, { "epoch": 0.1423948220064725, "grad_norm": 4.938568592071533, "learning_rate": 1.8714285714285713e-06, "loss": 0.6318232417106628, "step": 132 }, { "epoch": 0.14455231930960086, "grad_norm": 6.683016777038574, "learning_rate": 1.8999999999999998e-06, "loss": 0.7641289234161377, "step": 134 }, { "epoch": 0.14670981661272922, "grad_norm": 3.555145740509033, "learning_rate": 1.9285714285714285e-06, "loss": 0.7520711421966553, "step": 136 }, { "epoch": 0.1488673139158576, "grad_norm": 2.5793240070343018, "learning_rate": 1.957142857142857e-06, "loss": 0.7046728730201721, "step": 138 }, { "epoch": 0.15102481121898598, "grad_norm": 4.05105447769165, "learning_rate": 1.985714285714286e-06, "loss": 0.4872206449508667, "step": 140 }, { "epoch": 0.15318230852211434, "grad_norm": 3.6239945888519287, "learning_rate": 1.9999993632405402e-06, "loss": 0.6896734237670898, "step": 142 }, { "epoch": 0.1553398058252427, "grad_norm": 1.404502272605896, "learning_rate": 1.999994269170269e-06, "loss": 0.5899009108543396, "step": 144 }, { "epoch": 0.1574973031283711, "grad_norm": 4.971773147583008, "learning_rate": 1.9999840810585597e-06, "loss": 0.7049793601036072, "step": 146 }, { "epoch": 0.15965480043149946, "grad_norm": 1.3405131101608276, "learning_rate": 1.9999687989630773e-06, "loss": 0.7568652033805847, "step": 148 }, { "epoch": 0.16181229773462782, "grad_norm": 1.5060237646102905, "learning_rate": 1.9999484229703205e-06, "loss": 0.5981850624084473, "step": 150 }, { "epoch": 0.16396979503775622, "grad_norm": 5.329726696014404, "learning_rate": 1.9999229531956187e-06, "loss": 0.4102513790130615, "step": 152 }, { "epoch": 0.16612729234088458, "grad_norm": 6.227677822113037, "learning_rate": 1.9998923897831327e-06, "loss": 0.6681348085403442, "step": 154 }, { "epoch": 0.16828478964401294, "grad_norm": 12.442171096801758, "learning_rate": 1.9998567329058537e-06, "loss": 0.7901923060417175, "step": 156 }, { "epoch": 0.1704422869471413, "grad_norm": 1.463600754737854, "learning_rate": 1.9998159827656035e-06, "loss": 0.6713565587997437, "step": 158 }, { "epoch": 0.1725997842502697, "grad_norm": 4.213168621063232, "learning_rate": 1.9997701395930303e-06, "loss": 0.8061548471450806, "step": 160 }, { "epoch": 0.17475728155339806, "grad_norm": 1.8637281656265259, "learning_rate": 1.9997192036476113e-06, "loss": 0.6461450457572937, "step": 162 }, { "epoch": 0.17691477885652643, "grad_norm": 1.6268417835235596, "learning_rate": 1.999663175217647e-06, "loss": 0.5500176548957825, "step": 164 }, { "epoch": 0.1790722761596548, "grad_norm": 1.7090154886245728, "learning_rate": 1.999602054620263e-06, "loss": 0.6174845695495605, "step": 166 }, { "epoch": 0.18122977346278318, "grad_norm": 1.6175590753555298, "learning_rate": 1.9995358422014078e-06, "loss": 0.5844609141349792, "step": 168 }, { "epoch": 0.18338727076591155, "grad_norm": 1.7991397380828857, "learning_rate": 1.9994645383358485e-06, "loss": 0.6766707897186279, "step": 170 }, { "epoch": 0.1855447680690399, "grad_norm": 2.1886162757873535, "learning_rate": 1.9993881434271707e-06, "loss": 0.7125424146652222, "step": 172 }, { "epoch": 0.18770226537216828, "grad_norm": 2.0554721355438232, "learning_rate": 1.9993066579077766e-06, "loss": 0.6898304224014282, "step": 174 }, { "epoch": 0.18985976267529667, "grad_norm": 1.493452548980713, "learning_rate": 1.9992200822388794e-06, "loss": 0.6477411985397339, "step": 176 }, { "epoch": 0.19201725997842503, "grad_norm": 5.064616680145264, "learning_rate": 1.999128416910507e-06, "loss": 0.5987610816955566, "step": 178 }, { "epoch": 0.1941747572815534, "grad_norm": 1.9240336418151855, "learning_rate": 1.9990316624414902e-06, "loss": 0.5100513100624084, "step": 180 }, { "epoch": 0.19633225458468176, "grad_norm": 2.9364237785339355, "learning_rate": 1.998929819379468e-06, "loss": 0.8424034714698792, "step": 182 }, { "epoch": 0.19848975188781015, "grad_norm": 1.8846420049667358, "learning_rate": 1.998822888300881e-06, "loss": 0.6540043354034424, "step": 184 }, { "epoch": 0.20064724919093851, "grad_norm": 25.783281326293945, "learning_rate": 1.9987108698109675e-06, "loss": 0.7192497253417969, "step": 186 }, { "epoch": 0.20280474649406688, "grad_norm": 3.445650100708008, "learning_rate": 1.9985937645437617e-06, "loss": 0.4845433831214905, "step": 188 }, { "epoch": 0.20496224379719524, "grad_norm": 2.80410099029541, "learning_rate": 1.9984715731620883e-06, "loss": 0.42129552364349365, "step": 190 }, { "epoch": 0.20711974110032363, "grad_norm": 4.098501682281494, "learning_rate": 1.9983442963575616e-06, "loss": 0.5982234477996826, "step": 192 }, { "epoch": 0.209277238403452, "grad_norm": 3.001051664352417, "learning_rate": 1.998211934850578e-06, "loss": 0.65160071849823, "step": 194 }, { "epoch": 0.21143473570658036, "grad_norm": 3.8019604682922363, "learning_rate": 1.998074489390314e-06, "loss": 0.5729217529296875, "step": 196 }, { "epoch": 0.21359223300970873, "grad_norm": 7.936295986175537, "learning_rate": 1.997931960754724e-06, "loss": 0.6380269527435303, "step": 198 }, { "epoch": 0.21574973031283712, "grad_norm": 1.6919151544570923, "learning_rate": 1.99778434975053e-06, "loss": 0.692238450050354, "step": 200 }, { "epoch": 0.21790722761596548, "grad_norm": 2.152122974395752, "learning_rate": 1.997631657213223e-06, "loss": 0.5761340856552124, "step": 202 }, { "epoch": 0.22006472491909385, "grad_norm": 1.63760244846344, "learning_rate": 1.9974738840070554e-06, "loss": 0.6452651619911194, "step": 204 }, { "epoch": 0.2222222222222222, "grad_norm": 2.098172187805176, "learning_rate": 1.9973110310250364e-06, "loss": 0.6504206657409668, "step": 206 }, { "epoch": 0.2243797195253506, "grad_norm": 5.233450889587402, "learning_rate": 1.9971430991889274e-06, "loss": 0.603036642074585, "step": 208 }, { "epoch": 0.22653721682847897, "grad_norm": 5.004756927490234, "learning_rate": 1.996970089449236e-06, "loss": 0.6306214332580566, "step": 210 }, { "epoch": 0.22869471413160733, "grad_norm": 2.422592878341675, "learning_rate": 1.9967920027852115e-06, "loss": 0.5596987009048462, "step": 212 }, { "epoch": 0.2308522114347357, "grad_norm": 1.5725358724594116, "learning_rate": 1.99660884020484e-06, "loss": 0.5717631578445435, "step": 214 }, { "epoch": 0.23300970873786409, "grad_norm": 1.93349289894104, "learning_rate": 1.9964206027448355e-06, "loss": 0.5819022059440613, "step": 216 }, { "epoch": 0.23516720604099245, "grad_norm": 4.832953453063965, "learning_rate": 1.9962272914706387e-06, "loss": 0.6450964212417603, "step": 218 }, { "epoch": 0.2373247033441208, "grad_norm": 3.763730525970459, "learning_rate": 1.996028907476406e-06, "loss": 0.7145527601242065, "step": 220 }, { "epoch": 0.23948220064724918, "grad_norm": 3.421351909637451, "learning_rate": 1.995825451885008e-06, "loss": 0.7275266051292419, "step": 222 }, { "epoch": 0.24163969795037757, "grad_norm": 1.2539039850234985, "learning_rate": 1.995616925848019e-06, "loss": 0.6702066659927368, "step": 224 }, { "epoch": 0.24379719525350593, "grad_norm": 3.2891104221343994, "learning_rate": 1.9954033305457154e-06, "loss": 0.6769608855247498, "step": 226 }, { "epoch": 0.2459546925566343, "grad_norm": 3.350397825241089, "learning_rate": 1.995184667187062e-06, "loss": 0.5695778727531433, "step": 228 }, { "epoch": 0.2481121898597627, "grad_norm": 1.5770859718322754, "learning_rate": 1.994960937009713e-06, "loss": 0.690039873123169, "step": 230 }, { "epoch": 0.25026968716289105, "grad_norm": 12.937152862548828, "learning_rate": 1.9947321412799988e-06, "loss": 0.883323073387146, "step": 232 }, { "epoch": 0.2524271844660194, "grad_norm": 3.2988932132720947, "learning_rate": 1.994498281292922e-06, "loss": 0.798008918762207, "step": 234 }, { "epoch": 0.2545846817691478, "grad_norm": 8.973938941955566, "learning_rate": 1.9942593583721493e-06, "loss": 0.6434545516967773, "step": 236 }, { "epoch": 0.25674217907227614, "grad_norm": 1.573055624961853, "learning_rate": 1.9940153738700045e-06, "loss": 0.6816240549087524, "step": 238 }, { "epoch": 0.2588996763754045, "grad_norm": 1.305035948753357, "learning_rate": 1.9937663291674593e-06, "loss": 0.7506214380264282, "step": 240 }, { "epoch": 0.26105717367853293, "grad_norm": 1.362859845161438, "learning_rate": 1.993512225674127e-06, "loss": 0.6811486482620239, "step": 242 }, { "epoch": 0.2632146709816613, "grad_norm": 1.6364890336990356, "learning_rate": 1.9932530648282555e-06, "loss": 0.648339569568634, "step": 244 }, { "epoch": 0.26537216828478966, "grad_norm": 1.4568816423416138, "learning_rate": 1.992988848096715e-06, "loss": 0.6864685416221619, "step": 246 }, { "epoch": 0.267529665587918, "grad_norm": 37.19289779663086, "learning_rate": 1.9927195769749953e-06, "loss": 0.8183077573776245, "step": 248 }, { "epoch": 0.2696871628910464, "grad_norm": 2.8543128967285156, "learning_rate": 1.9924452529871915e-06, "loss": 0.6495329141616821, "step": 250 }, { "epoch": 0.27184466019417475, "grad_norm": 2.8163821697235107, "learning_rate": 1.992165877686001e-06, "loss": 0.7900782227516174, "step": 252 }, { "epoch": 0.2740021574973031, "grad_norm": 1.5852282047271729, "learning_rate": 1.9918814526527105e-06, "loss": 0.47972819209098816, "step": 254 }, { "epoch": 0.2761596548004315, "grad_norm": 1.4288207292556763, "learning_rate": 1.9915919794971892e-06, "loss": 0.5876221656799316, "step": 256 }, { "epoch": 0.2783171521035599, "grad_norm": 3.196465015411377, "learning_rate": 1.9912974598578793e-06, "loss": 0.46865469217300415, "step": 258 }, { "epoch": 0.28047464940668826, "grad_norm": 1.5016988515853882, "learning_rate": 1.9909978954017847e-06, "loss": 0.7941604852676392, "step": 260 }, { "epoch": 0.2826321467098166, "grad_norm": 2.889617919921875, "learning_rate": 1.9906932878244665e-06, "loss": 0.5378029942512512, "step": 262 }, { "epoch": 0.284789644012945, "grad_norm": 1.2625280618667603, "learning_rate": 1.990383638850028e-06, "loss": 0.7057135701179504, "step": 264 }, { "epoch": 0.28694714131607335, "grad_norm": 2.495908260345459, "learning_rate": 1.990068950231107e-06, "loss": 0.6187635660171509, "step": 266 }, { "epoch": 0.2891046386192017, "grad_norm": 3.3619916439056396, "learning_rate": 1.9897492237488683e-06, "loss": 0.7007441520690918, "step": 268 }, { "epoch": 0.2912621359223301, "grad_norm": 2.7590885162353516, "learning_rate": 1.9894244612129886e-06, "loss": 0.5531818270683289, "step": 270 }, { "epoch": 0.29341963322545844, "grad_norm": 13.184637069702148, "learning_rate": 1.9890946644616523e-06, "loss": 0.7034265398979187, "step": 272 }, { "epoch": 0.29557713052858686, "grad_norm": 3.412360191345215, "learning_rate": 1.9887598353615344e-06, "loss": 0.7009316086769104, "step": 274 }, { "epoch": 0.2977346278317152, "grad_norm": 4.455333232879639, "learning_rate": 1.988419975807796e-06, "loss": 0.662095844745636, "step": 276 }, { "epoch": 0.2998921251348436, "grad_norm": 4.525757789611816, "learning_rate": 1.988075087724069e-06, "loss": 0.6825252771377563, "step": 278 }, { "epoch": 0.30204962243797195, "grad_norm": 2.143056869506836, "learning_rate": 1.98772517306245e-06, "loss": 0.674209713935852, "step": 280 }, { "epoch": 0.3042071197411003, "grad_norm": 8.63754940032959, "learning_rate": 1.9873702338034837e-06, "loss": 0.6299592852592468, "step": 282 }, { "epoch": 0.3063646170442287, "grad_norm": 3.6352531909942627, "learning_rate": 1.9870102719561552e-06, "loss": 0.6460418701171875, "step": 284 }, { "epoch": 0.30852211434735705, "grad_norm": 6.43894624710083, "learning_rate": 1.9866452895578784e-06, "loss": 0.4357595145702362, "step": 286 }, { "epoch": 0.3106796116504854, "grad_norm": 4.515871047973633, "learning_rate": 1.986275288674484e-06, "loss": 0.6767151355743408, "step": 288 }, { "epoch": 0.31283710895361383, "grad_norm": 1.191287636756897, "learning_rate": 1.9859002714002067e-06, "loss": 0.6962684392929077, "step": 290 }, { "epoch": 0.3149946062567422, "grad_norm": 6.864166259765625, "learning_rate": 1.9855202398576756e-06, "loss": 0.6553777456283569, "step": 292 }, { "epoch": 0.31715210355987056, "grad_norm": 2.185790777206421, "learning_rate": 1.9851351961979e-06, "loss": 0.7482725977897644, "step": 294 }, { "epoch": 0.3193096008629989, "grad_norm": 3.45621395111084, "learning_rate": 1.9847451426002587e-06, "loss": 0.4616151452064514, "step": 296 }, { "epoch": 0.3214670981661273, "grad_norm": 6.849677085876465, "learning_rate": 1.9843500812724876e-06, "loss": 0.6612831354141235, "step": 298 }, { "epoch": 0.32362459546925565, "grad_norm": 5.291024208068848, "learning_rate": 1.9839500144506657e-06, "loss": 0.6871935129165649, "step": 300 }, { "epoch": 0.325782092772384, "grad_norm": 4.101494789123535, "learning_rate": 1.9835449443992042e-06, "loss": 0.4521007537841797, "step": 302 }, { "epoch": 0.32793959007551243, "grad_norm": 2.2320597171783447, "learning_rate": 1.9831348734108325e-06, "loss": 0.6099227666854858, "step": 304 }, { "epoch": 0.3300970873786408, "grad_norm": 2.0806515216827393, "learning_rate": 1.9827198038065867e-06, "loss": 0.6959011554718018, "step": 306 }, { "epoch": 0.33225458468176916, "grad_norm": 2.5396556854248047, "learning_rate": 1.9822997379357946e-06, "loss": 0.6063118577003479, "step": 308 }, { "epoch": 0.3344120819848975, "grad_norm": 1.9925243854522705, "learning_rate": 1.9818746781760637e-06, "loss": 0.7447793483734131, "step": 310 }, { "epoch": 0.3365695792880259, "grad_norm": 1.5124423503875732, "learning_rate": 1.9814446269332665e-06, "loss": 0.6756496429443359, "step": 312 }, { "epoch": 0.33872707659115425, "grad_norm": 1.5381604433059692, "learning_rate": 1.9810095866415288e-06, "loss": 0.7244548797607422, "step": 314 }, { "epoch": 0.3408845738942826, "grad_norm": 2.3492956161499023, "learning_rate": 1.980569559763214e-06, "loss": 0.7345068454742432, "step": 316 }, { "epoch": 0.343042071197411, "grad_norm": 3.104962110519409, "learning_rate": 1.980124548788911e-06, "loss": 0.7250902056694031, "step": 318 }, { "epoch": 0.3451995685005394, "grad_norm": 5.452524662017822, "learning_rate": 1.9796745562374177e-06, "loss": 0.5213475823402405, "step": 320 }, { "epoch": 0.34735706580366776, "grad_norm": 1.2911655902862549, "learning_rate": 1.9792195846557292e-06, "loss": 0.41105973720550537, "step": 322 }, { "epoch": 0.34951456310679613, "grad_norm": 1.4482433795928955, "learning_rate": 1.9787596366190224e-06, "loss": 0.6460384130477905, "step": 324 }, { "epoch": 0.3516720604099245, "grad_norm": 7.015091419219971, "learning_rate": 1.9782947147306403e-06, "loss": 0.7474948763847351, "step": 326 }, { "epoch": 0.35382955771305286, "grad_norm": 3.078240156173706, "learning_rate": 1.9778248216220793e-06, "loss": 0.6818826198577881, "step": 328 }, { "epoch": 0.3559870550161812, "grad_norm": 1.273003339767456, "learning_rate": 1.977349959952973e-06, "loss": 0.6558285355567932, "step": 330 }, { "epoch": 0.3581445523193096, "grad_norm": 5.311271667480469, "learning_rate": 1.976870132411077e-06, "loss": 0.48177772760391235, "step": 332 }, { "epoch": 0.36030204962243795, "grad_norm": 4.013199329376221, "learning_rate": 1.976385341712255e-06, "loss": 0.6249281167984009, "step": 334 }, { "epoch": 0.36245954692556637, "grad_norm": 10.565508842468262, "learning_rate": 1.9758955906004624e-06, "loss": 0.5407902002334595, "step": 336 }, { "epoch": 0.36461704422869473, "grad_norm": 3.089301824569702, "learning_rate": 1.97540088184773e-06, "loss": 0.5577709674835205, "step": 338 }, { "epoch": 0.3667745415318231, "grad_norm": 9.143464088439941, "learning_rate": 1.97490121825415e-06, "loss": 0.7211488485336304, "step": 340 }, { "epoch": 0.36893203883495146, "grad_norm": 1.91471529006958, "learning_rate": 1.97439660264786e-06, "loss": 0.6752923727035522, "step": 342 }, { "epoch": 0.3710895361380798, "grad_norm": 1.919268250465393, "learning_rate": 1.9738870378850255e-06, "loss": 0.6122534871101379, "step": 344 }, { "epoch": 0.3732470334412082, "grad_norm": 1.600658893585205, "learning_rate": 1.973372526849825e-06, "loss": 0.6465229988098145, "step": 346 }, { "epoch": 0.37540453074433655, "grad_norm": 2.6975924968719482, "learning_rate": 1.9728530724544317e-06, "loss": 0.7250155806541443, "step": 348 }, { "epoch": 0.3775620280474649, "grad_norm": 1.543426752090454, "learning_rate": 1.972328677639003e-06, "loss": 0.6498576998710632, "step": 350 }, { "epoch": 0.37971952535059333, "grad_norm": 2.0587544441223145, "learning_rate": 1.971799345371654e-06, "loss": 0.6255270838737488, "step": 352 }, { "epoch": 0.3818770226537217, "grad_norm": 4.156800746917725, "learning_rate": 1.97126507864845e-06, "loss": 0.7264662981033325, "step": 354 }, { "epoch": 0.38403451995685006, "grad_norm": 1.7672313451766968, "learning_rate": 1.9707258804933843e-06, "loss": 0.6123859882354736, "step": 356 }, { "epoch": 0.3861920172599784, "grad_norm": 2.0073418617248535, "learning_rate": 1.9701817539583623e-06, "loss": 0.584026038646698, "step": 358 }, { "epoch": 0.3883495145631068, "grad_norm": 1.1840739250183105, "learning_rate": 1.9696327021231857e-06, "loss": 0.7097981572151184, "step": 360 }, { "epoch": 0.39050701186623515, "grad_norm": 1.7339706420898438, "learning_rate": 1.9690787280955324e-06, "loss": 0.6338366866111755, "step": 362 }, { "epoch": 0.3926645091693635, "grad_norm": 2.4935598373413086, "learning_rate": 1.9685198350109406e-06, "loss": 0.5935678482055664, "step": 364 }, { "epoch": 0.3948220064724919, "grad_norm": 6.941248893737793, "learning_rate": 1.9679560260327916e-06, "loss": 0.7488420009613037, "step": 366 }, { "epoch": 0.3969795037756203, "grad_norm": 5.579442024230957, "learning_rate": 1.9673873043522904e-06, "loss": 0.6892845630645752, "step": 368 }, { "epoch": 0.39913700107874867, "grad_norm": 1.7267175912857056, "learning_rate": 1.9668136731884486e-06, "loss": 0.7125424742698669, "step": 370 }, { "epoch": 0.40129449838187703, "grad_norm": 1.7778825759887695, "learning_rate": 1.966235135788065e-06, "loss": 0.6329432129859924, "step": 372 }, { "epoch": 0.4034519956850054, "grad_norm": 4.815840244293213, "learning_rate": 1.965651695425709e-06, "loss": 0.6711968183517456, "step": 374 }, { "epoch": 0.40560949298813376, "grad_norm": 1.8194682598114014, "learning_rate": 1.965063355403701e-06, "loss": 0.5624091625213623, "step": 376 }, { "epoch": 0.4077669902912621, "grad_norm": 3.847508192062378, "learning_rate": 1.9644701190520943e-06, "loss": 0.43470942974090576, "step": 378 }, { "epoch": 0.4099244875943905, "grad_norm": 1.7189173698425293, "learning_rate": 1.9638719897286545e-06, "loss": 0.5556265115737915, "step": 380 }, { "epoch": 0.4120819848975189, "grad_norm": 2.6449780464172363, "learning_rate": 1.9632689708188435e-06, "loss": 0.5694633722305298, "step": 382 }, { "epoch": 0.41423948220064727, "grad_norm": 6.1240410804748535, "learning_rate": 1.962661065735797e-06, "loss": 0.6872696280479431, "step": 384 }, { "epoch": 0.41639697950377563, "grad_norm": 5.022050380706787, "learning_rate": 1.9620482779203086e-06, "loss": 0.6486364603042603, "step": 386 }, { "epoch": 0.418554476806904, "grad_norm": 1.9404337406158447, "learning_rate": 1.961430610840807e-06, "loss": 0.6287031173706055, "step": 388 }, { "epoch": 0.42071197411003236, "grad_norm": 4.457851886749268, "learning_rate": 1.9608080679933385e-06, "loss": 0.40318727493286133, "step": 390 }, { "epoch": 0.4228694714131607, "grad_norm": 8.74242115020752, "learning_rate": 1.960180652901547e-06, "loss": 0.6349734663963318, "step": 392 }, { "epoch": 0.4250269687162891, "grad_norm": 4.8024725914001465, "learning_rate": 1.9595483691166534e-06, "loss": 0.6840596199035645, "step": 394 }, { "epoch": 0.42718446601941745, "grad_norm": 7.498271465301514, "learning_rate": 1.958911220217436e-06, "loss": 0.817265510559082, "step": 396 }, { "epoch": 0.42934196332254587, "grad_norm": 2.826599359512329, "learning_rate": 1.958269209810209e-06, "loss": 0.5891008377075195, "step": 398 }, { "epoch": 0.43149946062567424, "grad_norm": 4.398319244384766, "learning_rate": 1.957622341528805e-06, "loss": 0.5453633069992065, "step": 400 }, { "epoch": 0.4336569579288026, "grad_norm": 2.75591778755188, "learning_rate": 1.9569706190345512e-06, "loss": 0.6217541098594666, "step": 402 }, { "epoch": 0.43581445523193096, "grad_norm": 2.1976888179779053, "learning_rate": 1.9563140460162505e-06, "loss": 0.658210813999176, "step": 404 }, { "epoch": 0.43797195253505933, "grad_norm": 4.405237197875977, "learning_rate": 1.9556526261901602e-06, "loss": 0.735411524772644, "step": 406 }, { "epoch": 0.4401294498381877, "grad_norm": 3.2632224559783936, "learning_rate": 1.95498636329997e-06, "loss": 0.5909388065338135, "step": 408 }, { "epoch": 0.44228694714131606, "grad_norm": 2.1249871253967285, "learning_rate": 1.9543152611167837e-06, "loss": 0.7845476865768433, "step": 410 }, { "epoch": 0.4444444444444444, "grad_norm": 4.967487335205078, "learning_rate": 1.9536393234390937e-06, "loss": 0.6481199860572815, "step": 412 }, { "epoch": 0.44660194174757284, "grad_norm": 1.769586443901062, "learning_rate": 1.9529585540927636e-06, "loss": 0.5764113068580627, "step": 414 }, { "epoch": 0.4487594390507012, "grad_norm": 4.130702972412109, "learning_rate": 1.9522729569310036e-06, "loss": 0.7091037034988403, "step": 416 }, { "epoch": 0.45091693635382957, "grad_norm": 2.021075487136841, "learning_rate": 1.9515825358343494e-06, "loss": 0.6121603846549988, "step": 418 }, { "epoch": 0.45307443365695793, "grad_norm": 2.6177845001220703, "learning_rate": 1.9508872947106413e-06, "loss": 0.6298436522483826, "step": 420 }, { "epoch": 0.4552319309600863, "grad_norm": 2.469846487045288, "learning_rate": 1.9501872374950016e-06, "loss": 0.6969653367996216, "step": 422 }, { "epoch": 0.45738942826321466, "grad_norm": 1.5605947971343994, "learning_rate": 1.949482368149811e-06, "loss": 0.6391591429710388, "step": 424 }, { "epoch": 0.459546925566343, "grad_norm": 7.972052097320557, "learning_rate": 1.948772690664688e-06, "loss": 0.6320364475250244, "step": 426 }, { "epoch": 0.4617044228694714, "grad_norm": 3.05439829826355, "learning_rate": 1.9480582090564657e-06, "loss": 0.7582883238792419, "step": 428 }, { "epoch": 0.4638619201725998, "grad_norm": 1.7435839176177979, "learning_rate": 1.9473389273691686e-06, "loss": 0.653886616230011, "step": 430 }, { "epoch": 0.46601941747572817, "grad_norm": 1.643883466720581, "learning_rate": 1.9466148496739893e-06, "loss": 0.6401156187057495, "step": 432 }, { "epoch": 0.46817691477885653, "grad_norm": 1.892043113708496, "learning_rate": 1.9458859800692685e-06, "loss": 0.42768222093582153, "step": 434 }, { "epoch": 0.4703344120819849, "grad_norm": 7.755466938018799, "learning_rate": 1.9451523226804665e-06, "loss": 0.7829925417900085, "step": 436 }, { "epoch": 0.47249190938511326, "grad_norm": 25.02216339111328, "learning_rate": 1.944413881660145e-06, "loss": 0.8609887361526489, "step": 438 }, { "epoch": 0.4746494066882416, "grad_norm": 1.8407223224639893, "learning_rate": 1.9436706611879413e-06, "loss": 0.5991024374961853, "step": 440 }, { "epoch": 0.47680690399137, "grad_norm": 1.7243049144744873, "learning_rate": 1.9429226654705433e-06, "loss": 0.5861119627952576, "step": 442 }, { "epoch": 0.47896440129449835, "grad_norm": 3.1721413135528564, "learning_rate": 1.9421698987416685e-06, "loss": 0.6749376058578491, "step": 444 }, { "epoch": 0.4811218985976268, "grad_norm": 3.368656873703003, "learning_rate": 1.941412365262039e-06, "loss": 0.6256532073020935, "step": 446 }, { "epoch": 0.48327939590075514, "grad_norm": 2.6724510192871094, "learning_rate": 1.9406500693193555e-06, "loss": 0.6529517769813538, "step": 448 }, { "epoch": 0.4854368932038835, "grad_norm": 2.0183353424072266, "learning_rate": 1.939883015228276e-06, "loss": 0.7027242183685303, "step": 450 }, { "epoch": 0.48759439050701187, "grad_norm": 2.5906269550323486, "learning_rate": 1.9391112073303897e-06, "loss": 0.6666867733001709, "step": 452 }, { "epoch": 0.48975188781014023, "grad_norm": 3.279174327850342, "learning_rate": 1.9383346499941934e-06, "loss": 0.6068412065505981, "step": 454 }, { "epoch": 0.4919093851132686, "grad_norm": 2.020169496536255, "learning_rate": 1.937553347615064e-06, "loss": 0.49952036142349243, "step": 456 }, { "epoch": 0.49406688241639696, "grad_norm": 1.5360465049743652, "learning_rate": 1.936767304615237e-06, "loss": 0.6741431951522827, "step": 458 }, { "epoch": 0.4962243797195254, "grad_norm": 2.7520928382873535, "learning_rate": 1.935976525443782e-06, "loss": 0.6988986730575562, "step": 460 }, { "epoch": 0.49838187702265374, "grad_norm": 3.1902847290039062, "learning_rate": 1.935181014576573e-06, "loss": 0.6338163614273071, "step": 462 }, { "epoch": 0.5005393743257821, "grad_norm": 2.226433515548706, "learning_rate": 1.934380776516266e-06, "loss": 0.6862495541572571, "step": 464 }, { "epoch": 0.5026968716289104, "grad_norm": 1.4880640506744385, "learning_rate": 1.9335758157922757e-06, "loss": 0.7557521462440491, "step": 466 }, { "epoch": 0.5048543689320388, "grad_norm": 1.4232990741729736, "learning_rate": 1.932766136960745e-06, "loss": 0.675652027130127, "step": 468 }, { "epoch": 0.5070118662351673, "grad_norm": 2.1612069606781006, "learning_rate": 1.931951744604522e-06, "loss": 0.621537446975708, "step": 470 }, { "epoch": 0.5091693635382956, "grad_norm": 1.3291016817092896, "learning_rate": 1.9311326433331355e-06, "loss": 0.6317250728607178, "step": 472 }, { "epoch": 0.511326860841424, "grad_norm": 1.6099094152450562, "learning_rate": 1.9303088377827653e-06, "loss": 0.7552534937858582, "step": 474 }, { "epoch": 0.5134843581445523, "grad_norm": 1.411257028579712, "learning_rate": 1.9294803326162187e-06, "loss": 0.7963615655899048, "step": 476 }, { "epoch": 0.5156418554476807, "grad_norm": 2.952651262283325, "learning_rate": 1.9286471325229026e-06, "loss": 0.7329859733581543, "step": 478 }, { "epoch": 0.517799352750809, "grad_norm": 0.9981465935707092, "learning_rate": 1.9278092422187978e-06, "loss": 0.4232223331928253, "step": 480 }, { "epoch": 0.5199568500539374, "grad_norm": 1.3622761964797974, "learning_rate": 1.926966666446433e-06, "loss": 0.6612151265144348, "step": 482 }, { "epoch": 0.5221143473570659, "grad_norm": 2.9351611137390137, "learning_rate": 1.9261194099748554e-06, "loss": 0.6452651023864746, "step": 484 }, { "epoch": 0.5242718446601942, "grad_norm": 2.3402223587036133, "learning_rate": 1.9252674775996062e-06, "loss": 0.7631157040596008, "step": 486 }, { "epoch": 0.5264293419633226, "grad_norm": 2.175255537033081, "learning_rate": 1.9244108741426933e-06, "loss": 0.6183757781982422, "step": 488 }, { "epoch": 0.5285868392664509, "grad_norm": 1.3048573732376099, "learning_rate": 1.923549604452562e-06, "loss": 0.5466787219047546, "step": 490 }, { "epoch": 0.5307443365695793, "grad_norm": 6.882724761962891, "learning_rate": 1.9226836734040696e-06, "loss": 0.6256377696990967, "step": 492 }, { "epoch": 0.5329018338727076, "grad_norm": 5.080470085144043, "learning_rate": 1.9218130858984566e-06, "loss": 0.7089909315109253, "step": 494 }, { "epoch": 0.535059331175836, "grad_norm": 5.151968955993652, "learning_rate": 1.92093784686332e-06, "loss": 0.5963342785835266, "step": 496 }, { "epoch": 0.5372168284789643, "grad_norm": 2.0868022441864014, "learning_rate": 1.9200579612525847e-06, "loss": 0.7230027318000793, "step": 498 }, { "epoch": 0.5393743257820928, "grad_norm": 1.5343974828720093, "learning_rate": 1.919173434046476e-06, "loss": 0.5582040548324585, "step": 500 }, { "epoch": 0.5415318230852212, "grad_norm": 2.0801985263824463, "learning_rate": 1.9182842702514894e-06, "loss": 0.7240785956382751, "step": 502 }, { "epoch": 0.5436893203883495, "grad_norm": 4.034970760345459, "learning_rate": 1.917390474900365e-06, "loss": 0.6458247900009155, "step": 504 }, { "epoch": 0.5458468176914779, "grad_norm": 1.5025601387023926, "learning_rate": 1.916492053052059e-06, "loss": 0.7182348370552063, "step": 506 }, { "epoch": 0.5480043149946062, "grad_norm": 1.2147194147109985, "learning_rate": 1.915589009791712e-06, "loss": 0.7499125599861145, "step": 508 }, { "epoch": 0.5501618122977346, "grad_norm": 1.183869481086731, "learning_rate": 1.914681350230623e-06, "loss": 0.6138162612915039, "step": 510 }, { "epoch": 0.552319309600863, "grad_norm": 1.6860522031784058, "learning_rate": 1.9137690795062195e-06, "loss": 0.665122389793396, "step": 512 }, { "epoch": 0.5544768069039914, "grad_norm": 2.0282976627349854, "learning_rate": 1.9128522027820286e-06, "loss": 0.6816024780273438, "step": 514 }, { "epoch": 0.5566343042071198, "grad_norm": 4.281038284301758, "learning_rate": 1.911930725247649e-06, "loss": 0.5960591435432434, "step": 516 }, { "epoch": 0.5587918015102481, "grad_norm": 1.3044649362564087, "learning_rate": 1.911004652118718e-06, "loss": 0.7166500687599182, "step": 518 }, { "epoch": 0.5609492988133765, "grad_norm": 4.467653751373291, "learning_rate": 1.9100739886368856e-06, "loss": 0.6787055134773254, "step": 520 }, { "epoch": 0.5631067961165048, "grad_norm": 6.644638538360596, "learning_rate": 1.9091387400697836e-06, "loss": 0.6345533728599548, "step": 522 }, { "epoch": 0.5652642934196332, "grad_norm": 2.0383713245391846, "learning_rate": 1.908198911710996e-06, "loss": 0.432686984539032, "step": 524 }, { "epoch": 0.5674217907227616, "grad_norm": 1.5780389308929443, "learning_rate": 1.9072545088800281e-06, "loss": 0.7076600790023804, "step": 526 }, { "epoch": 0.56957928802589, "grad_norm": 1.6204893589019775, "learning_rate": 1.9063055369222779e-06, "loss": 0.6012558341026306, "step": 528 }, { "epoch": 0.5717367853290184, "grad_norm": 6.985592842102051, "learning_rate": 1.905352001209004e-06, "loss": 0.6433860063552856, "step": 530 }, { "epoch": 0.5738942826321467, "grad_norm": 1.4386237859725952, "learning_rate": 1.9043939071372968e-06, "loss": 0.6871167421340942, "step": 532 }, { "epoch": 0.5760517799352751, "grad_norm": 1.2262943983078003, "learning_rate": 1.9034312601300479e-06, "loss": 0.7119494080543518, "step": 534 }, { "epoch": 0.5782092772384034, "grad_norm": 2.725543975830078, "learning_rate": 1.9024640656359182e-06, "loss": 0.5970579385757446, "step": 536 }, { "epoch": 0.5803667745415318, "grad_norm": 1.5997346639633179, "learning_rate": 1.901492329129308e-06, "loss": 0.6494900584220886, "step": 538 }, { "epoch": 0.5825242718446602, "grad_norm": 3.0856845378875732, "learning_rate": 1.9005160561103253e-06, "loss": 0.7896479368209839, "step": 540 }, { "epoch": 0.5846817691477886, "grad_norm": 2.178337574005127, "learning_rate": 1.8995352521047555e-06, "loss": 0.7269325256347656, "step": 542 }, { "epoch": 0.5868392664509169, "grad_norm": 2.0112404823303223, "learning_rate": 1.8985499226640302e-06, "loss": 0.5430014133453369, "step": 544 }, { "epoch": 0.5889967637540453, "grad_norm": 2.0597128868103027, "learning_rate": 1.897560073365195e-06, "loss": 0.7385756969451904, "step": 546 }, { "epoch": 0.5911542610571737, "grad_norm": 0.3906221091747284, "learning_rate": 1.8965657098108778e-06, "loss": 0.739960253238678, "step": 548 }, { "epoch": 0.593311758360302, "grad_norm": 6.757683753967285, "learning_rate": 1.8955668376292584e-06, "loss": 0.5648355484008789, "step": 550 }, { "epoch": 0.5954692556634305, "grad_norm": 1.4673168659210205, "learning_rate": 1.8945634624740346e-06, "loss": 0.6756861209869385, "step": 552 }, { "epoch": 0.5976267529665588, "grad_norm": 2.3454344272613525, "learning_rate": 1.8935555900243924e-06, "loss": 0.693338930606842, "step": 554 }, { "epoch": 0.5997842502696872, "grad_norm": 3.2340376377105713, "learning_rate": 1.8925432259849734e-06, "loss": 0.6485008001327515, "step": 556 }, { "epoch": 0.6019417475728155, "grad_norm": 3.2170920372009277, "learning_rate": 1.89152637608584e-06, "loss": 0.6817625164985657, "step": 558 }, { "epoch": 0.6040992448759439, "grad_norm": 1.9984098672866821, "learning_rate": 1.8905050460824468e-06, "loss": 0.7717204093933105, "step": 560 }, { "epoch": 0.6062567421790723, "grad_norm": 2.4421756267547607, "learning_rate": 1.8894792417556051e-06, "loss": 0.6852340698242188, "step": 562 }, { "epoch": 0.6084142394822006, "grad_norm": 2.060135841369629, "learning_rate": 1.888448968911452e-06, "loss": 0.7176313996315002, "step": 564 }, { "epoch": 0.6105717367853291, "grad_norm": 3.1218700408935547, "learning_rate": 1.887414233381416e-06, "loss": 0.6021454334259033, "step": 566 }, { "epoch": 0.6127292340884574, "grad_norm": 1.8716174364089966, "learning_rate": 1.8863750410221855e-06, "loss": 0.6650149822235107, "step": 568 }, { "epoch": 0.6148867313915858, "grad_norm": 1.871856689453125, "learning_rate": 1.8853313977156739e-06, "loss": 0.6372621655464172, "step": 570 }, { "epoch": 0.6170442286947141, "grad_norm": 2.7764410972595215, "learning_rate": 1.8842833093689885e-06, "loss": 0.6875618100166321, "step": 572 }, { "epoch": 0.6192017259978425, "grad_norm": 1.4079262018203735, "learning_rate": 1.8832307819143953e-06, "loss": 0.685975968837738, "step": 574 }, { "epoch": 0.6213592233009708, "grad_norm": 5.893849849700928, "learning_rate": 1.8821738213092862e-06, "loss": 0.631260871887207, "step": 576 }, { "epoch": 0.6235167206040992, "grad_norm": 2.4246366024017334, "learning_rate": 1.8811124335361445e-06, "loss": 0.6432245373725891, "step": 578 }, { "epoch": 0.6256742179072277, "grad_norm": 56.797996520996094, "learning_rate": 1.8800466246025129e-06, "loss": 0.6804959177970886, "step": 580 }, { "epoch": 0.627831715210356, "grad_norm": 1.7273633480072021, "learning_rate": 1.8789764005409568e-06, "loss": 0.5822848677635193, "step": 582 }, { "epoch": 0.6299892125134844, "grad_norm": 1.6046229600906372, "learning_rate": 1.8779017674090322e-06, "loss": 0.7005263566970825, "step": 584 }, { "epoch": 0.6321467098166127, "grad_norm": 1.5924113988876343, "learning_rate": 1.8768227312892515e-06, "loss": 0.7687848210334778, "step": 586 }, { "epoch": 0.6343042071197411, "grad_norm": 2.035219430923462, "learning_rate": 1.875739298289047e-06, "loss": 0.5710114240646362, "step": 588 }, { "epoch": 0.6364617044228694, "grad_norm": 2.202737808227539, "learning_rate": 1.8746514745407386e-06, "loss": 0.7539809346199036, "step": 590 }, { "epoch": 0.6386192017259978, "grad_norm": 5.263622760772705, "learning_rate": 1.8735592662014985e-06, "loss": 0.7617581486701965, "step": 592 }, { "epoch": 0.6407766990291263, "grad_norm": 17.145244598388672, "learning_rate": 1.872462679453315e-06, "loss": 0.8727496266365051, "step": 594 }, { "epoch": 0.6429341963322546, "grad_norm": 1.9058817625045776, "learning_rate": 1.871361720502959e-06, "loss": 0.6560637950897217, "step": 596 }, { "epoch": 0.645091693635383, "grad_norm": 2.8487465381622314, "learning_rate": 1.8702563955819493e-06, "loss": 0.5254390835762024, "step": 598 }, { "epoch": 0.6472491909385113, "grad_norm": 2.9062187671661377, "learning_rate": 1.869146710946515e-06, "loss": 0.7910107970237732, "step": 600 }, { "epoch": 0.6494066882416397, "grad_norm": 4.04607629776001, "learning_rate": 1.8680326728775622e-06, "loss": 0.6240645051002502, "step": 602 }, { "epoch": 0.651564185544768, "grad_norm": 1.921399474143982, "learning_rate": 1.866914287680638e-06, "loss": 0.8376886248588562, "step": 604 }, { "epoch": 0.6537216828478964, "grad_norm": 7.570333957672119, "learning_rate": 1.8657915616858946e-06, "loss": 0.7127501368522644, "step": 606 }, { "epoch": 0.6558791801510249, "grad_norm": 1.5097516775131226, "learning_rate": 1.864664501248053e-06, "loss": 0.5545579195022583, "step": 608 }, { "epoch": 0.6580366774541532, "grad_norm": 3.1739096641540527, "learning_rate": 1.8635331127463678e-06, "loss": 0.6854344010353088, "step": 610 }, { "epoch": 0.6601941747572816, "grad_norm": 2.4847121238708496, "learning_rate": 1.8623974025845913e-06, "loss": 0.6225752234458923, "step": 612 }, { "epoch": 0.6623516720604099, "grad_norm": 2.919856071472168, "learning_rate": 1.8612573771909354e-06, "loss": 0.7242894172668457, "step": 614 }, { "epoch": 0.6645091693635383, "grad_norm": 1.5826700925827026, "learning_rate": 1.8601130430180384e-06, "loss": 0.7404430508613586, "step": 616 }, { "epoch": 0.6666666666666666, "grad_norm": 1.9459686279296875, "learning_rate": 1.8589644065429246e-06, "loss": 0.7019950747489929, "step": 618 }, { "epoch": 0.668824163969795, "grad_norm": 2.677245855331421, "learning_rate": 1.8578114742669712e-06, "loss": 0.6545602083206177, "step": 620 }, { "epoch": 0.6709816612729234, "grad_norm": 6.284696578979492, "learning_rate": 1.85665425271587e-06, "loss": 0.4951339364051819, "step": 622 }, { "epoch": 0.6731391585760518, "grad_norm": 1.498757243156433, "learning_rate": 1.8554927484395892e-06, "loss": 0.7832834720611572, "step": 624 }, { "epoch": 0.6752966558791802, "grad_norm": 2.4137635231018066, "learning_rate": 1.8543269680123387e-06, "loss": 0.6441301107406616, "step": 626 }, { "epoch": 0.6774541531823085, "grad_norm": 4.308967590332031, "learning_rate": 1.853156918032531e-06, "loss": 0.7098633050918579, "step": 628 }, { "epoch": 0.6796116504854369, "grad_norm": 2.8264269828796387, "learning_rate": 1.851982605122746e-06, "loss": 0.610696017742157, "step": 630 }, { "epoch": 0.6817691477885652, "grad_norm": 1.2851277589797974, "learning_rate": 1.8508040359296903e-06, "loss": 0.7390373945236206, "step": 632 }, { "epoch": 0.6839266450916937, "grad_norm": 1.6539459228515625, "learning_rate": 1.8496212171241626e-06, "loss": 0.5240519046783447, "step": 634 }, { "epoch": 0.686084142394822, "grad_norm": 1.8807573318481445, "learning_rate": 1.8484341554010143e-06, "loss": 0.4707701504230499, "step": 636 }, { "epoch": 0.6882416396979504, "grad_norm": 2.358454704284668, "learning_rate": 1.8472428574791121e-06, "loss": 0.7253568172454834, "step": 638 }, { "epoch": 0.6903991370010788, "grad_norm": 2.440108299255371, "learning_rate": 1.8460473301013004e-06, "loss": 0.7356727123260498, "step": 640 }, { "epoch": 0.6925566343042071, "grad_norm": 2.899152994155884, "learning_rate": 1.844847580034362e-06, "loss": 0.6664748191833496, "step": 642 }, { "epoch": 0.6947141316073355, "grad_norm": 5.704761028289795, "learning_rate": 1.843643614068981e-06, "loss": 0.7694708108901978, "step": 644 }, { "epoch": 0.6968716289104638, "grad_norm": 1.51004159450531, "learning_rate": 1.842435439019703e-06, "loss": 0.6821762323379517, "step": 646 }, { "epoch": 0.6990291262135923, "grad_norm": 5.242131233215332, "learning_rate": 1.8412230617248988e-06, "loss": 0.6199461221694946, "step": 648 }, { "epoch": 0.7011866235167206, "grad_norm": 2.5778682231903076, "learning_rate": 1.8400064890467229e-06, "loss": 0.6760554313659668, "step": 650 }, { "epoch": 0.703344120819849, "grad_norm": 1.4639006853103638, "learning_rate": 1.8387857278710763e-06, "loss": 0.662639856338501, "step": 652 }, { "epoch": 0.7055016181229773, "grad_norm": 2.5555951595306396, "learning_rate": 1.8375607851075678e-06, "loss": 0.5903278589248657, "step": 654 }, { "epoch": 0.7076591154261057, "grad_norm": 1.839576244354248, "learning_rate": 1.8363316676894743e-06, "loss": 0.659648597240448, "step": 656 }, { "epoch": 0.7098166127292341, "grad_norm": 4.13273811340332, "learning_rate": 1.8350983825737008e-06, "loss": 0.5222451090812683, "step": 658 }, { "epoch": 0.7119741100323624, "grad_norm": 1.8703253269195557, "learning_rate": 1.833860936740742e-06, "loss": 0.7516009211540222, "step": 660 }, { "epoch": 0.7141316073354909, "grad_norm": 1.5587713718414307, "learning_rate": 1.8326193371946435e-06, "loss": 0.6802030801773071, "step": 662 }, { "epoch": 0.7162891046386192, "grad_norm": 1.9971494674682617, "learning_rate": 1.8313735909629605e-06, "loss": 0.5823180675506592, "step": 664 }, { "epoch": 0.7184466019417476, "grad_norm": 3.314469575881958, "learning_rate": 1.8301237050967186e-06, "loss": 0.6089075207710266, "step": 666 }, { "epoch": 0.7206040992448759, "grad_norm": 2.5151665210723877, "learning_rate": 1.8288696866703752e-06, "loss": 0.5487096309661865, "step": 668 }, { "epoch": 0.7227615965480043, "grad_norm": 1.755199909210205, "learning_rate": 1.827611542781777e-06, "loss": 0.6520088911056519, "step": 670 }, { "epoch": 0.7249190938511327, "grad_norm": 2.233076333999634, "learning_rate": 1.826349280552121e-06, "loss": 0.6878398656845093, "step": 672 }, { "epoch": 0.727076591154261, "grad_norm": 2.0914413928985596, "learning_rate": 1.8250829071259162e-06, "loss": 0.6050041317939758, "step": 674 }, { "epoch": 0.7292340884573895, "grad_norm": 3.670649528503418, "learning_rate": 1.8238124296709396e-06, "loss": 0.5783309936523438, "step": 676 }, { "epoch": 0.7313915857605178, "grad_norm": 4.5103559494018555, "learning_rate": 1.8225378553781978e-06, "loss": 0.5625826120376587, "step": 678 }, { "epoch": 0.7335490830636462, "grad_norm": 2.3067467212677, "learning_rate": 1.821259191461886e-06, "loss": 0.6222144365310669, "step": 680 }, { "epoch": 0.7357065803667745, "grad_norm": 4.616910934448242, "learning_rate": 1.819976445159347e-06, "loss": 0.6577675938606262, "step": 682 }, { "epoch": 0.7378640776699029, "grad_norm": 2.574132204055786, "learning_rate": 1.81868962373103e-06, "loss": 0.5882217884063721, "step": 684 }, { "epoch": 0.7400215749730313, "grad_norm": 1.4304159879684448, "learning_rate": 1.8173987344604505e-06, "loss": 0.7386992573738098, "step": 686 }, { "epoch": 0.7421790722761596, "grad_norm": 2.1306235790252686, "learning_rate": 1.816103784654147e-06, "loss": 0.586725115776062, "step": 688 }, { "epoch": 0.7443365695792881, "grad_norm": 1.9864001274108887, "learning_rate": 1.814804781641642e-06, "loss": 0.5822692513465881, "step": 690 }, { "epoch": 0.7464940668824164, "grad_norm": 1.6799951791763306, "learning_rate": 1.8135017327753992e-06, "loss": 0.630893886089325, "step": 692 }, { "epoch": 0.7486515641855448, "grad_norm": 1.0661367177963257, "learning_rate": 1.8121946454307816e-06, "loss": 0.682563066482544, "step": 694 }, { "epoch": 0.7508090614886731, "grad_norm": 4.472043514251709, "learning_rate": 1.8108835270060122e-06, "loss": 0.6360002756118774, "step": 696 }, { "epoch": 0.7529665587918015, "grad_norm": 1.2949084043502808, "learning_rate": 1.8095683849221276e-06, "loss": 0.6381992101669312, "step": 698 }, { "epoch": 0.7551240560949298, "grad_norm": 1.5483993291854858, "learning_rate": 1.8082492266229404e-06, "loss": 0.7825127243995667, "step": 700 }, { "epoch": 0.7572815533980582, "grad_norm": 2.104930877685547, "learning_rate": 1.806926059574995e-06, "loss": 0.5905802845954895, "step": 702 }, { "epoch": 0.7594390507011867, "grad_norm": 2.691180467605591, "learning_rate": 1.805598891267525e-06, "loss": 0.6105803847312927, "step": 704 }, { "epoch": 0.761596548004315, "grad_norm": 2.662587881088257, "learning_rate": 1.8042677292124127e-06, "loss": 0.7156485319137573, "step": 706 }, { "epoch": 0.7637540453074434, "grad_norm": 4.052894115447998, "learning_rate": 1.802932580944144e-06, "loss": 0.6582145690917969, "step": 708 }, { "epoch": 0.7659115426105717, "grad_norm": 1.772103190422058, "learning_rate": 1.801593454019768e-06, "loss": 0.5497456789016724, "step": 710 }, { "epoch": 0.7680690399137001, "grad_norm": 4.38840913772583, "learning_rate": 1.8002503560188531e-06, "loss": 0.8528274893760681, "step": 712 }, { "epoch": 0.7702265372168284, "grad_norm": 0.47714903950691223, "learning_rate": 1.798903294543444e-06, "loss": 0.6722896695137024, "step": 714 }, { "epoch": 0.7723840345199569, "grad_norm": 1.1433959007263184, "learning_rate": 1.797552277218019e-06, "loss": 0.640397310256958, "step": 716 }, { "epoch": 0.7745415318230853, "grad_norm": 2.8816723823547363, "learning_rate": 1.7961973116894475e-06, "loss": 0.43922677636146545, "step": 718 }, { "epoch": 0.7766990291262136, "grad_norm": 1.3017030954360962, "learning_rate": 1.7948384056269452e-06, "loss": 0.6236469745635986, "step": 720 }, { "epoch": 0.778856526429342, "grad_norm": 5.502106189727783, "learning_rate": 1.7934755667220324e-06, "loss": 0.6106448769569397, "step": 722 }, { "epoch": 0.7810140237324703, "grad_norm": 3.15694522857666, "learning_rate": 1.7921088026884895e-06, "loss": 0.7106237411499023, "step": 724 }, { "epoch": 0.7831715210355987, "grad_norm": 1.253527283668518, "learning_rate": 1.7907381212623119e-06, "loss": 0.6325215101242065, "step": 726 }, { "epoch": 0.785329018338727, "grad_norm": 5.992726802825928, "learning_rate": 1.7893635302016699e-06, "loss": 0.698371946811676, "step": 728 }, { "epoch": 0.7874865156418555, "grad_norm": 2.6129043102264404, "learning_rate": 1.7879850372868614e-06, "loss": 0.8592634797096252, "step": 730 }, { "epoch": 0.7896440129449838, "grad_norm": 1.9722578525543213, "learning_rate": 1.7866026503202696e-06, "loss": 0.7127001881599426, "step": 732 }, { "epoch": 0.7918015102481122, "grad_norm": 2.3035688400268555, "learning_rate": 1.7852163771263183e-06, "loss": 0.7264171242713928, "step": 734 }, { "epoch": 0.7939590075512406, "grad_norm": 1.6729274988174438, "learning_rate": 1.7838262255514273e-06, "loss": 0.6522683501243591, "step": 736 }, { "epoch": 0.7961165048543689, "grad_norm": 1.2953232526779175, "learning_rate": 1.7824322034639688e-06, "loss": 0.7508292198181152, "step": 738 }, { "epoch": 0.7982740021574973, "grad_norm": 1.8854900598526, "learning_rate": 1.781034318754222e-06, "loss": 0.8205673098564148, "step": 740 }, { "epoch": 0.8004314994606256, "grad_norm": 2.379824161529541, "learning_rate": 1.7796325793343296e-06, "loss": 0.627574622631073, "step": 742 }, { "epoch": 0.8025889967637541, "grad_norm": 1.9717144966125488, "learning_rate": 1.7782269931382514e-06, "loss": 0.41914719343185425, "step": 744 }, { "epoch": 0.8047464940668824, "grad_norm": 3.594667911529541, "learning_rate": 1.7768175681217208e-06, "loss": 0.40262705087661743, "step": 746 }, { "epoch": 0.8069039913700108, "grad_norm": 1.6693843603134155, "learning_rate": 1.7754043122621986e-06, "loss": 0.6387592554092407, "step": 748 }, { "epoch": 0.8090614886731392, "grad_norm": 5.557443141937256, "learning_rate": 1.7739872335588298e-06, "loss": 0.6391375064849854, "step": 750 }, { "epoch": 0.8112189859762675, "grad_norm": 1.475829839706421, "learning_rate": 1.7725663400323957e-06, "loss": 0.5560780167579651, "step": 752 }, { "epoch": 0.8133764832793959, "grad_norm": 3.5974369049072266, "learning_rate": 1.77114163972527e-06, "loss": 0.7343906164169312, "step": 754 }, { "epoch": 0.8155339805825242, "grad_norm": 1.595281720161438, "learning_rate": 1.769713140701374e-06, "loss": 0.6695587038993835, "step": 756 }, { "epoch": 0.8176914778856527, "grad_norm": 1.641003131866455, "learning_rate": 1.7682808510461292e-06, "loss": 0.7364107370376587, "step": 758 }, { "epoch": 0.819848975188781, "grad_norm": 1.8976866006851196, "learning_rate": 1.7668447788664126e-06, "loss": 0.5367798209190369, "step": 760 }, { "epoch": 0.8220064724919094, "grad_norm": 2.350424289703369, "learning_rate": 1.7654049322905105e-06, "loss": 0.6110427379608154, "step": 762 }, { "epoch": 0.8241639697950378, "grad_norm": 1.7859790325164795, "learning_rate": 1.7639613194680727e-06, "loss": 0.8835413455963135, "step": 764 }, { "epoch": 0.8263214670981661, "grad_norm": 1.9460476636886597, "learning_rate": 1.7625139485700664e-06, "loss": 0.5881315469741821, "step": 766 }, { "epoch": 0.8284789644012945, "grad_norm": 17.784387588500977, "learning_rate": 1.7610628277887297e-06, "loss": 0.5561118721961975, "step": 768 }, { "epoch": 0.8306364617044228, "grad_norm": 2.5915396213531494, "learning_rate": 1.7596079653375253e-06, "loss": 0.6103290319442749, "step": 770 }, { "epoch": 0.8327939590075513, "grad_norm": 7.325887680053711, "learning_rate": 1.758149369451094e-06, "loss": 0.52987140417099, "step": 772 }, { "epoch": 0.8349514563106796, "grad_norm": 3.882723093032837, "learning_rate": 1.7566870483852086e-06, "loss": 0.7465340495109558, "step": 774 }, { "epoch": 0.837108953613808, "grad_norm": 5.062621593475342, "learning_rate": 1.7552210104167257e-06, "loss": 0.6753080487251282, "step": 776 }, { "epoch": 0.8392664509169363, "grad_norm": 6.415841102600098, "learning_rate": 1.753751263843541e-06, "loss": 0.693338930606842, "step": 778 }, { "epoch": 0.8414239482200647, "grad_norm": 2.172607898712158, "learning_rate": 1.7522778169845408e-06, "loss": 0.7129068374633789, "step": 780 }, { "epoch": 0.8435814455231931, "grad_norm": 2.3066418170928955, "learning_rate": 1.7508006781795555e-06, "loss": 0.5250005722045898, "step": 782 }, { "epoch": 0.8457389428263214, "grad_norm": 1.514641523361206, "learning_rate": 1.7493198557893109e-06, "loss": 0.5880756378173828, "step": 784 }, { "epoch": 0.8478964401294499, "grad_norm": 2.358647584915161, "learning_rate": 1.7478353581953846e-06, "loss": 0.6020887494087219, "step": 786 }, { "epoch": 0.8500539374325782, "grad_norm": 2.5027408599853516, "learning_rate": 1.746347193800154e-06, "loss": 0.7379757165908813, "step": 788 }, { "epoch": 0.8522114347357066, "grad_norm": 1.7015382051467896, "learning_rate": 1.7448553710267519e-06, "loss": 0.3867076337337494, "step": 790 }, { "epoch": 0.8543689320388349, "grad_norm": 24.41814613342285, "learning_rate": 1.7433598983190181e-06, "loss": 0.5596577525138855, "step": 792 }, { "epoch": 0.8565264293419633, "grad_norm": 0.5729015469551086, "learning_rate": 1.74186078414145e-06, "loss": 0.37773168087005615, "step": 794 }, { "epoch": 0.8586839266450917, "grad_norm": 1.438088297843933, "learning_rate": 1.7403580369791577e-06, "loss": 0.6138755679130554, "step": 796 }, { "epoch": 0.86084142394822, "grad_norm": 1.3894504308700562, "learning_rate": 1.7388516653378134e-06, "loss": 0.6411980986595154, "step": 798 }, { "epoch": 0.8629989212513485, "grad_norm": 7.195361137390137, "learning_rate": 1.7373416777436036e-06, "loss": 0.5361164808273315, "step": 800 }, { "epoch": 0.8651564185544768, "grad_norm": 3.299745798110962, "learning_rate": 1.7358280827431829e-06, "loss": 0.45560529828071594, "step": 802 }, { "epoch": 0.8673139158576052, "grad_norm": 2.5834922790527344, "learning_rate": 1.7343108889036223e-06, "loss": 0.5199063420295715, "step": 804 }, { "epoch": 0.8694714131607335, "grad_norm": 2.0384316444396973, "learning_rate": 1.7327901048123644e-06, "loss": 0.6027982234954834, "step": 806 }, { "epoch": 0.8716289104638619, "grad_norm": 3.682217597961426, "learning_rate": 1.7312657390771714e-06, "loss": 0.6176765561103821, "step": 808 }, { "epoch": 0.8737864077669902, "grad_norm": 3.4343974590301514, "learning_rate": 1.7297378003260787e-06, "loss": 0.6307402849197388, "step": 810 }, { "epoch": 0.8759439050701187, "grad_norm": 18.784271240234375, "learning_rate": 1.728206297207345e-06, "loss": 0.4677152633666992, "step": 812 }, { "epoch": 0.8781014023732471, "grad_norm": 1.2662842273712158, "learning_rate": 1.7266712383894037e-06, "loss": 0.6467829346656799, "step": 814 }, { "epoch": 0.8802588996763754, "grad_norm": 1.4935745000839233, "learning_rate": 1.7251326325608135e-06, "loss": 0.6746770143508911, "step": 816 }, { "epoch": 0.8824163969795038, "grad_norm": 3.506131410598755, "learning_rate": 1.7235904884302098e-06, "loss": 0.6060282588005066, "step": 818 }, { "epoch": 0.8845738942826321, "grad_norm": 3.4990806579589844, "learning_rate": 1.7220448147262555e-06, "loss": 0.5744661688804626, "step": 820 }, { "epoch": 0.8867313915857605, "grad_norm": 1.9610271453857422, "learning_rate": 1.7204956201975898e-06, "loss": 0.6914322376251221, "step": 822 }, { "epoch": 0.8888888888888888, "grad_norm": 2.512073040008545, "learning_rate": 1.7189429136127814e-06, "loss": 0.6700202226638794, "step": 824 }, { "epoch": 0.8910463861920173, "grad_norm": 2.086268663406372, "learning_rate": 1.7173867037602767e-06, "loss": 0.7067221403121948, "step": 826 }, { "epoch": 0.8932038834951457, "grad_norm": 3.7312817573547363, "learning_rate": 1.7158269994483514e-06, "loss": 0.31625503301620483, "step": 828 }, { "epoch": 0.895361380798274, "grad_norm": 6.126044750213623, "learning_rate": 1.71426380950506e-06, "loss": 0.5323830842971802, "step": 830 }, { "epoch": 0.8975188781014024, "grad_norm": 1.7195242643356323, "learning_rate": 1.712697142778186e-06, "loss": 0.782951831817627, "step": 832 }, { "epoch": 0.8996763754045307, "grad_norm": 8.366249084472656, "learning_rate": 1.7111270081351913e-06, "loss": 0.5681637525558472, "step": 834 }, { "epoch": 0.9018338727076591, "grad_norm": 2.791904926300049, "learning_rate": 1.7095534144631668e-06, "loss": 0.7307286858558655, "step": 836 }, { "epoch": 0.9039913700107874, "grad_norm": 1.9204684495925903, "learning_rate": 1.7079763706687827e-06, "loss": 0.6743446588516235, "step": 838 }, { "epoch": 0.9061488673139159, "grad_norm": 5.066476821899414, "learning_rate": 1.706395885678235e-06, "loss": 0.6655571460723877, "step": 840 }, { "epoch": 0.9083063646170443, "grad_norm": 4.142644882202148, "learning_rate": 1.7048119684371996e-06, "loss": 0.6895488500595093, "step": 842 }, { "epoch": 0.9104638619201726, "grad_norm": 7.594639778137207, "learning_rate": 1.7032246279107776e-06, "loss": 0.8503600358963013, "step": 844 }, { "epoch": 0.912621359223301, "grad_norm": 8.625396728515625, "learning_rate": 1.7016338730834468e-06, "loss": 0.8498875498771667, "step": 846 }, { "epoch": 0.9147788565264293, "grad_norm": 2.5336923599243164, "learning_rate": 1.7000397129590104e-06, "loss": 0.49179524183273315, "step": 848 }, { "epoch": 0.9169363538295577, "grad_norm": 12.546621322631836, "learning_rate": 1.6984421565605447e-06, "loss": 0.7858133912086487, "step": 850 }, { "epoch": 0.919093851132686, "grad_norm": 1.803154706954956, "learning_rate": 1.696841212930351e-06, "loss": 0.42831236124038696, "step": 852 }, { "epoch": 0.9212513484358145, "grad_norm": 1.1497598886489868, "learning_rate": 1.695236891129901e-06, "loss": 0.6902183294296265, "step": 854 }, { "epoch": 0.9234088457389428, "grad_norm": 0.7733110785484314, "learning_rate": 1.6936292002397876e-06, "loss": 0.7910528182983398, "step": 856 }, { "epoch": 0.9255663430420712, "grad_norm": 4.334436893463135, "learning_rate": 1.692018149359674e-06, "loss": 0.6410449743270874, "step": 858 }, { "epoch": 0.9277238403451996, "grad_norm": 3.1473817825317383, "learning_rate": 1.6904037476082403e-06, "loss": 0.5418177247047424, "step": 860 }, { "epoch": 0.9298813376483279, "grad_norm": 3.289321184158325, "learning_rate": 1.6887860041231324e-06, "loss": 0.8675633072853088, "step": 862 }, { "epoch": 0.9320388349514563, "grad_norm": 0.5947059392929077, "learning_rate": 1.6871649280609114e-06, "loss": 0.7250087857246399, "step": 864 }, { "epoch": 0.9341963322545846, "grad_norm": 1.812920331954956, "learning_rate": 1.6855405285970012e-06, "loss": 0.3274366855621338, "step": 866 }, { "epoch": 0.9363538295577131, "grad_norm": 1.37776780128479, "learning_rate": 1.6839128149256357e-06, "loss": 0.7339057326316833, "step": 868 }, { "epoch": 0.9385113268608414, "grad_norm": 2.6405365467071533, "learning_rate": 1.6822817962598079e-06, "loss": 0.4312754273414612, "step": 870 }, { "epoch": 0.9406688241639698, "grad_norm": 32.455711364746094, "learning_rate": 1.6806474818312178e-06, "loss": 0.6649459600448608, "step": 872 }, { "epoch": 0.9428263214670982, "grad_norm": 1.1529123783111572, "learning_rate": 1.6790098808902187e-06, "loss": 0.694479763507843, "step": 874 }, { "epoch": 0.9449838187702265, "grad_norm": 6.395750045776367, "learning_rate": 1.6773690027057665e-06, "loss": 0.5320945978164673, "step": 876 }, { "epoch": 0.9471413160733549, "grad_norm": 12.70807933807373, "learning_rate": 1.6757248565653666e-06, "loss": 0.7014382481575012, "step": 878 }, { "epoch": 0.9492988133764833, "grad_norm": 2.392099142074585, "learning_rate": 1.674077451775021e-06, "loss": 0.9157409071922302, "step": 880 }, { "epoch": 0.9514563106796117, "grad_norm": 1.741999864578247, "learning_rate": 1.6724267976591756e-06, "loss": 0.616689145565033, "step": 882 }, { "epoch": 0.95361380798274, "grad_norm": 1.99687922000885, "learning_rate": 1.6707729035606691e-06, "loss": 0.5802426338195801, "step": 884 }, { "epoch": 0.9557713052858684, "grad_norm": 1.6920971870422363, "learning_rate": 1.6691157788406773e-06, "loss": 0.42533692717552185, "step": 886 }, { "epoch": 0.9579288025889967, "grad_norm": 3.21420955657959, "learning_rate": 1.6674554328786616e-06, "loss": 0.8310537338256836, "step": 888 }, { "epoch": 0.9600862998921251, "grad_norm": 2.7011826038360596, "learning_rate": 1.6657918750723176e-06, "loss": 0.8436251282691956, "step": 890 }, { "epoch": 0.9622437971952535, "grad_norm": 1.146492838859558, "learning_rate": 1.6641251148375184e-06, "loss": 0.4956342577934265, "step": 892 }, { "epoch": 0.9644012944983819, "grad_norm": 11.642980575561523, "learning_rate": 1.6624551616082635e-06, "loss": 0.643322765827179, "step": 894 }, { "epoch": 0.9665587918015103, "grad_norm": 1.810482144355774, "learning_rate": 1.6607820248366257e-06, "loss": 0.6843705177307129, "step": 896 }, { "epoch": 0.9687162891046386, "grad_norm": 1.3741674423217773, "learning_rate": 1.6591057139926966e-06, "loss": 0.7010579109191895, "step": 898 }, { "epoch": 0.970873786407767, "grad_norm": 3.2511136531829834, "learning_rate": 1.6574262385645323e-06, "loss": 0.6527800559997559, "step": 900 }, { "epoch": 0.9730312837108953, "grad_norm": 3.3792011737823486, "learning_rate": 1.6557436080581027e-06, "loss": 0.6726928949356079, "step": 902 }, { "epoch": 0.9751887810140237, "grad_norm": 1.9965680837631226, "learning_rate": 1.6540578319972335e-06, "loss": 0.7932605147361755, "step": 904 }, { "epoch": 0.9773462783171522, "grad_norm": 14.346431732177734, "learning_rate": 1.652368919923557e-06, "loss": 0.6329518556594849, "step": 906 }, { "epoch": 0.9795037756202805, "grad_norm": 1.1356829404830933, "learning_rate": 1.6506768813964527e-06, "loss": 0.6013335585594177, "step": 908 }, { "epoch": 0.9816612729234089, "grad_norm": 1.9456955194473267, "learning_rate": 1.6489817259929978e-06, "loss": 0.6943175792694092, "step": 910 }, { "epoch": 0.9838187702265372, "grad_norm": 75.11457824707031, "learning_rate": 1.647283463307912e-06, "loss": 0.499568372964859, "step": 912 }, { "epoch": 0.9859762675296656, "grad_norm": 2.1002883911132812, "learning_rate": 1.6455821029535006e-06, "loss": 0.6039252281188965, "step": 914 }, { "epoch": 0.9881337648327939, "grad_norm": 2.215057849884033, "learning_rate": 1.6438776545596032e-06, "loss": 0.6023073196411133, "step": 916 }, { "epoch": 0.9902912621359223, "grad_norm": 5.074563980102539, "learning_rate": 1.6421701277735377e-06, "loss": 0.6670839190483093, "step": 918 }, { "epoch": 0.9924487594390508, "grad_norm": 1.9246139526367188, "learning_rate": 1.6404595322600454e-06, "loss": 0.45060187578201294, "step": 920 }, { "epoch": 0.9946062567421791, "grad_norm": 2.673752784729004, "learning_rate": 1.638745877701238e-06, "loss": 0.5095839500427246, "step": 922 }, { "epoch": 0.9967637540453075, "grad_norm": 1.1209965944290161, "learning_rate": 1.6370291737965403e-06, "loss": 0.6856327652931213, "step": 924 }, { "epoch": 0.9989212513484358, "grad_norm": 1.4867414236068726, "learning_rate": 1.6353094302626375e-06, "loss": 0.7345451712608337, "step": 926 }, { "epoch": 1.0010787486515642, "grad_norm": 1.4351245164871216, "learning_rate": 1.6335866568334196e-06, "loss": 0.4384617805480957, "step": 928 }, { "epoch": 1.0032362459546926, "grad_norm": 1.608482837677002, "learning_rate": 1.6318608632599252e-06, "loss": 0.5233771800994873, "step": 930 }, { "epoch": 1.0053937432578208, "grad_norm": 1.6175332069396973, "learning_rate": 1.6301320593102877e-06, "loss": 0.5526682734489441, "step": 932 }, { "epoch": 1.0075512405609492, "grad_norm": 1.7772831916809082, "learning_rate": 1.6284002547696794e-06, "loss": 0.5304218530654907, "step": 934 }, { "epoch": 1.0097087378640777, "grad_norm": 1.2558060884475708, "learning_rate": 1.626665459440256e-06, "loss": 0.3196244239807129, "step": 936 }, { "epoch": 1.011866235167206, "grad_norm": 4.1625471115112305, "learning_rate": 1.6249276831411015e-06, "loss": 0.49367865920066833, "step": 938 }, { "epoch": 1.0140237324703345, "grad_norm": 2.80029296875, "learning_rate": 1.6231869357081726e-06, "loss": 0.5806005597114563, "step": 940 }, { "epoch": 1.0161812297734627, "grad_norm": 2.1595256328582764, "learning_rate": 1.6214432269942426e-06, "loss": 0.558415412902832, "step": 942 }, { "epoch": 1.0183387270765911, "grad_norm": 4.51309061050415, "learning_rate": 1.6196965668688455e-06, "loss": 0.4171544909477234, "step": 944 }, { "epoch": 1.0204962243797195, "grad_norm": 9.326614379882812, "learning_rate": 1.6179469652182215e-06, "loss": 0.49132904410362244, "step": 946 }, { "epoch": 1.022653721682848, "grad_norm": 2.721449613571167, "learning_rate": 1.6161944319452599e-06, "loss": 0.526667058467865, "step": 948 }, { "epoch": 1.0248112189859762, "grad_norm": 8.595200538635254, "learning_rate": 1.6144389769694418e-06, "loss": 0.519080400466919, "step": 950 }, { "epoch": 1.0269687162891046, "grad_norm": 1.9319312572479248, "learning_rate": 1.6126806102267871e-06, "loss": 0.4982292950153351, "step": 952 }, { "epoch": 1.029126213592233, "grad_norm": 2.2782833576202393, "learning_rate": 1.6109193416697962e-06, "loss": 0.47339990735054016, "step": 954 }, { "epoch": 1.0312837108953614, "grad_norm": 1.7561050653457642, "learning_rate": 1.609155181267393e-06, "loss": 0.4229566156864166, "step": 956 }, { "epoch": 1.0334412081984898, "grad_norm": 2.3419620990753174, "learning_rate": 1.6073881390048708e-06, "loss": 0.5675852298736572, "step": 958 }, { "epoch": 1.035598705501618, "grad_norm": 4.762004852294922, "learning_rate": 1.6056182248838333e-06, "loss": 0.47640660405158997, "step": 960 }, { "epoch": 1.0377562028047465, "grad_norm": 1.654563307762146, "learning_rate": 1.6038454489221401e-06, "loss": 0.39150771498680115, "step": 962 }, { "epoch": 1.0399137001078749, "grad_norm": 6.535782337188721, "learning_rate": 1.6020698211538485e-06, "loss": 0.43942204117774963, "step": 964 }, { "epoch": 1.0420711974110033, "grad_norm": 1.442032814025879, "learning_rate": 1.6002913516291575e-06, "loss": 0.3959490954875946, "step": 966 }, { "epoch": 1.0442286947141317, "grad_norm": 2.8204493522644043, "learning_rate": 1.5985100504143508e-06, "loss": 0.46986186504364014, "step": 968 }, { "epoch": 1.04638619201726, "grad_norm": 5.788197994232178, "learning_rate": 1.596725927591739e-06, "loss": 0.5587306022644043, "step": 970 }, { "epoch": 1.0485436893203883, "grad_norm": 3.22556209564209, "learning_rate": 1.594938993259604e-06, "loss": 0.42848098278045654, "step": 972 }, { "epoch": 1.0507011866235167, "grad_norm": 5.465310573577881, "learning_rate": 1.5931492575321405e-06, "loss": 0.3230629861354828, "step": 974 }, { "epoch": 1.0528586839266452, "grad_norm": 2.285598039627075, "learning_rate": 1.5913567305394004e-06, "loss": 0.4129447937011719, "step": 976 }, { "epoch": 1.0550161812297734, "grad_norm": 2.510387659072876, "learning_rate": 1.5895614224272329e-06, "loss": 0.5222740173339844, "step": 978 }, { "epoch": 1.0571736785329018, "grad_norm": 3.7488322257995605, "learning_rate": 1.5877633433572293e-06, "loss": 0.47047188878059387, "step": 980 }, { "epoch": 1.0593311758360302, "grad_norm": 1.3235845565795898, "learning_rate": 1.5859625035066652e-06, "loss": 0.4286286234855652, "step": 982 }, { "epoch": 1.0614886731391586, "grad_norm": 1.2796275615692139, "learning_rate": 1.5841589130684417e-06, "loss": 0.411946564912796, "step": 984 }, { "epoch": 1.063646170442287, "grad_norm": 2.5920588970184326, "learning_rate": 1.5823525822510282e-06, "loss": 0.4910277724266052, "step": 986 }, { "epoch": 1.0658036677454152, "grad_norm": 13.590333938598633, "learning_rate": 1.5805435212784066e-06, "loss": 0.381788045167923, "step": 988 }, { "epoch": 1.0679611650485437, "grad_norm": 1.8935883045196533, "learning_rate": 1.5787317403900095e-06, "loss": 0.4319833517074585, "step": 990 }, { "epoch": 1.070118662351672, "grad_norm": 1.8740428686141968, "learning_rate": 1.5769172498406657e-06, "loss": 0.5537865161895752, "step": 992 }, { "epoch": 1.0722761596548005, "grad_norm": 2.8530309200286865, "learning_rate": 1.5751000599005411e-06, "loss": 0.45889872312545776, "step": 994 }, { "epoch": 1.074433656957929, "grad_norm": 3.0372843742370605, "learning_rate": 1.573280180855079e-06, "loss": 0.4843668043613434, "step": 996 }, { "epoch": 1.0765911542610571, "grad_norm": 1.9461435079574585, "learning_rate": 1.571457623004945e-06, "loss": 0.3833789527416229, "step": 998 }, { "epoch": 1.0787486515641855, "grad_norm": 4.167815208435059, "learning_rate": 1.5696323966659659e-06, "loss": 0.7622794508934021, "step": 1000 }, { "epoch": 1.080906148867314, "grad_norm": 2.6408567428588867, "learning_rate": 1.5678045121690723e-06, "loss": 0.38144806027412415, "step": 1002 }, { "epoch": 1.0830636461704424, "grad_norm": 1.6359201669692993, "learning_rate": 1.5659739798602412e-06, "loss": 0.5962096452713013, "step": 1004 }, { "epoch": 1.0852211434735706, "grad_norm": 1.9073861837387085, "learning_rate": 1.5641408101004348e-06, "loss": 0.5042172074317932, "step": 1006 }, { "epoch": 1.087378640776699, "grad_norm": 1.1828426122665405, "learning_rate": 1.5623050132655452e-06, "loss": 0.4065170884132385, "step": 1008 }, { "epoch": 1.0895361380798274, "grad_norm": 1.109727144241333, "learning_rate": 1.5604665997463326e-06, "loss": 0.3995954990386963, "step": 1010 }, { "epoch": 1.0916936353829558, "grad_norm": 2.5301997661590576, "learning_rate": 1.5586255799483685e-06, "loss": 0.4737590253353119, "step": 1012 }, { "epoch": 1.0938511326860842, "grad_norm": 3.5179555416107178, "learning_rate": 1.5567819642919768e-06, "loss": 0.3755728006362915, "step": 1014 }, { "epoch": 1.0960086299892124, "grad_norm": 2.1552042961120605, "learning_rate": 1.5549357632121722e-06, "loss": 0.5351279973983765, "step": 1016 }, { "epoch": 1.0981661272923409, "grad_norm": 1.9615085124969482, "learning_rate": 1.5530869871586058e-06, "loss": 0.480570912361145, "step": 1018 }, { "epoch": 1.1003236245954693, "grad_norm": 5.5772552490234375, "learning_rate": 1.5512356465955008e-06, "loss": 0.4701279103755951, "step": 1020 }, { "epoch": 1.1024811218985977, "grad_norm": 2.0782828330993652, "learning_rate": 1.5493817520015969e-06, "loss": 0.6023370027542114, "step": 1022 }, { "epoch": 1.104638619201726, "grad_norm": 4.706164360046387, "learning_rate": 1.5475253138700899e-06, "loss": 0.4403872489929199, "step": 1024 }, { "epoch": 1.1067961165048543, "grad_norm": 2.136815309524536, "learning_rate": 1.5456663427085716e-06, "loss": 0.49264582991600037, "step": 1026 }, { "epoch": 1.1089536138079827, "grad_norm": 2.051373243331909, "learning_rate": 1.543804849038972e-06, "loss": 0.4840565621852875, "step": 1028 }, { "epoch": 1.1111111111111112, "grad_norm": 2.1924808025360107, "learning_rate": 1.5419408433974974e-06, "loss": 0.49226483702659607, "step": 1030 }, { "epoch": 1.1132686084142396, "grad_norm": 3.719738245010376, "learning_rate": 1.5400743363345733e-06, "loss": 0.429510235786438, "step": 1032 }, { "epoch": 1.1154261057173678, "grad_norm": 1.6024198532104492, "learning_rate": 1.5382053384147828e-06, "loss": 0.5755860805511475, "step": 1034 }, { "epoch": 1.1175836030204962, "grad_norm": 5.685046672821045, "learning_rate": 1.5363338602168072e-06, "loss": 0.40157079696655273, "step": 1036 }, { "epoch": 1.1197411003236246, "grad_norm": 1.610744833946228, "learning_rate": 1.5344599123333671e-06, "loss": 0.4434182643890381, "step": 1038 }, { "epoch": 1.121898597626753, "grad_norm": 23.230365753173828, "learning_rate": 1.532583505371161e-06, "loss": 0.4990198314189911, "step": 1040 }, { "epoch": 1.1240560949298812, "grad_norm": 1.455960988998413, "learning_rate": 1.5307046499508066e-06, "loss": 0.4062468409538269, "step": 1042 }, { "epoch": 1.1262135922330097, "grad_norm": 1.460098385810852, "learning_rate": 1.5288233567067794e-06, "loss": 0.45499229431152344, "step": 1044 }, { "epoch": 1.128371089536138, "grad_norm": 1.3446215391159058, "learning_rate": 1.5269396362873542e-06, "loss": 0.4300175905227661, "step": 1046 }, { "epoch": 1.1305285868392665, "grad_norm": 1.6477187871932983, "learning_rate": 1.5250534993545426e-06, "loss": 0.4830603301525116, "step": 1048 }, { "epoch": 1.132686084142395, "grad_norm": 2.070373296737671, "learning_rate": 1.523164956584035e-06, "loss": 0.47534123063087463, "step": 1050 }, { "epoch": 1.134843581445523, "grad_norm": 2.0876166820526123, "learning_rate": 1.5212740186651378e-06, "loss": 0.4968222975730896, "step": 1052 }, { "epoch": 1.1370010787486515, "grad_norm": 1.7046785354614258, "learning_rate": 1.5193806963007156e-06, "loss": 0.4516274034976959, "step": 1054 }, { "epoch": 1.13915857605178, "grad_norm": 6.408827781677246, "learning_rate": 1.517485000207128e-06, "loss": 0.45875146985054016, "step": 1056 }, { "epoch": 1.1413160733549084, "grad_norm": 1.6034789085388184, "learning_rate": 1.5155869411141704e-06, "loss": 0.5700262188911438, "step": 1058 }, { "epoch": 1.1434735706580366, "grad_norm": 9.753545761108398, "learning_rate": 1.5136865297650134e-06, "loss": 0.3870803117752075, "step": 1060 }, { "epoch": 1.145631067961165, "grad_norm": 2.6454174518585205, "learning_rate": 1.511783776916141e-06, "loss": 0.1962374895811081, "step": 1062 }, { "epoch": 1.1477885652642934, "grad_norm": 9.807194709777832, "learning_rate": 1.5098786933372907e-06, "loss": 0.3792603611946106, "step": 1064 }, { "epoch": 1.1499460625674218, "grad_norm": 1.371470332145691, "learning_rate": 1.5079712898113916e-06, "loss": 0.4742359519004822, "step": 1066 }, { "epoch": 1.1521035598705502, "grad_norm": 9.515076637268066, "learning_rate": 1.5060615771345045e-06, "loss": 0.49537792801856995, "step": 1068 }, { "epoch": 1.1542610571736784, "grad_norm": 3.214311361312866, "learning_rate": 1.50414956611576e-06, "loss": 0.5695366859436035, "step": 1070 }, { "epoch": 1.1564185544768069, "grad_norm": 3.578993797302246, "learning_rate": 1.5022352675772967e-06, "loss": 0.4019346535205841, "step": 1072 }, { "epoch": 1.1585760517799353, "grad_norm": 1.514540195465088, "learning_rate": 1.5003186923542022e-06, "loss": 0.4417833089828491, "step": 1074 }, { "epoch": 1.1607335490830637, "grad_norm": 1.5279725790023804, "learning_rate": 1.4983998512944497e-06, "loss": 0.40684929490089417, "step": 1076 }, { "epoch": 1.162891046386192, "grad_norm": 2.6913864612579346, "learning_rate": 1.4964787552588364e-06, "loss": 0.6169437766075134, "step": 1078 }, { "epoch": 1.1650485436893203, "grad_norm": 6.149393558502197, "learning_rate": 1.4945554151209241e-06, "loss": 0.4913300573825836, "step": 1080 }, { "epoch": 1.1672060409924487, "grad_norm": 3.6629035472869873, "learning_rate": 1.4926298417669757e-06, "loss": 0.4479219615459442, "step": 1082 }, { "epoch": 1.1693635382955772, "grad_norm": 2.302075147628784, "learning_rate": 1.4907020460958943e-06, "loss": 0.4335775077342987, "step": 1084 }, { "epoch": 1.1715210355987056, "grad_norm": 1.0914833545684814, "learning_rate": 1.488772039019162e-06, "loss": 0.466959148645401, "step": 1086 }, { "epoch": 1.173678532901834, "grad_norm": 5.46653938293457, "learning_rate": 1.4868398314607765e-06, "loss": 0.6127966046333313, "step": 1088 }, { "epoch": 1.1758360302049622, "grad_norm": 1.7374179363250732, "learning_rate": 1.484905434357192e-06, "loss": 0.5522704124450684, "step": 1090 }, { "epoch": 1.1779935275080906, "grad_norm": 1.311828374862671, "learning_rate": 1.482968858657255e-06, "loss": 0.4033716320991516, "step": 1092 }, { "epoch": 1.180151024811219, "grad_norm": 1.440038800239563, "learning_rate": 1.481030115322142e-06, "loss": 0.4107467234134674, "step": 1094 }, { "epoch": 1.1823085221143474, "grad_norm": 17.832111358642578, "learning_rate": 1.4790892153253004e-06, "loss": 0.26430749893188477, "step": 1096 }, { "epoch": 1.1844660194174756, "grad_norm": 21.0089054107666, "learning_rate": 1.4771461696523828e-06, "loss": 0.2329411655664444, "step": 1098 }, { "epoch": 1.186623516720604, "grad_norm": 3.482215166091919, "learning_rate": 1.4752009893011877e-06, "loss": 0.33426716923713684, "step": 1100 }, { "epoch": 1.1887810140237325, "grad_norm": 1.4247711896896362, "learning_rate": 1.4732536852815948e-06, "loss": 0.3406693637371063, "step": 1102 }, { "epoch": 1.190938511326861, "grad_norm": 2.5058937072753906, "learning_rate": 1.4713042686155054e-06, "loss": 0.4682016670703888, "step": 1104 }, { "epoch": 1.1930960086299893, "grad_norm": 3.2917213439941406, "learning_rate": 1.469352750336778e-06, "loss": 0.5560429096221924, "step": 1106 }, { "epoch": 1.1952535059331175, "grad_norm": 2.8005712032318115, "learning_rate": 1.4673991414911653e-06, "loss": 0.49286743998527527, "step": 1108 }, { "epoch": 1.197411003236246, "grad_norm": 1.9056379795074463, "learning_rate": 1.465443453136255e-06, "loss": 0.5415875911712646, "step": 1110 }, { "epoch": 1.1995685005393744, "grad_norm": 3.3546078205108643, "learning_rate": 1.4634856963414022e-06, "loss": 0.5321105122566223, "step": 1112 }, { "epoch": 1.2017259978425028, "grad_norm": 1.4719895124435425, "learning_rate": 1.4615258821876726e-06, "loss": 0.4267783761024475, "step": 1114 }, { "epoch": 1.203883495145631, "grad_norm": 1.423250436782837, "learning_rate": 1.459564021767774e-06, "loss": 0.498091459274292, "step": 1116 }, { "epoch": 1.2060409924487594, "grad_norm": 2.6084094047546387, "learning_rate": 1.4576001261859981e-06, "loss": 0.4652736186981201, "step": 1118 }, { "epoch": 1.2081984897518878, "grad_norm": 1.2524727582931519, "learning_rate": 1.4556342065581548e-06, "loss": 0.5334936380386353, "step": 1120 }, { "epoch": 1.2103559870550162, "grad_norm": 4.288187026977539, "learning_rate": 1.453666274011511e-06, "loss": 0.6997748017311096, "step": 1122 }, { "epoch": 1.2125134843581447, "grad_norm": 2.6082146167755127, "learning_rate": 1.4516963396847255e-06, "loss": 0.6567426323890686, "step": 1124 }, { "epoch": 1.2146709816612729, "grad_norm": 1.885820746421814, "learning_rate": 1.4497244147277895e-06, "loss": 0.41897153854370117, "step": 1126 }, { "epoch": 1.2168284789644013, "grad_norm": 3.6321957111358643, "learning_rate": 1.4477505103019587e-06, "loss": 0.4789751172065735, "step": 1128 }, { "epoch": 1.2189859762675297, "grad_norm": 3.317688226699829, "learning_rate": 1.4457746375796956e-06, "loss": 0.551139235496521, "step": 1130 }, { "epoch": 1.2211434735706581, "grad_norm": 1.2082242965698242, "learning_rate": 1.4437968077446013e-06, "loss": 0.3944661617279053, "step": 1132 }, { "epoch": 1.2233009708737863, "grad_norm": 1.8302658796310425, "learning_rate": 1.4418170319913548e-06, "loss": 0.23596011102199554, "step": 1134 }, { "epoch": 1.2254584681769147, "grad_norm": 3.345332622528076, "learning_rate": 1.43983532152565e-06, "loss": 0.20758569240570068, "step": 1136 }, { "epoch": 1.2276159654800431, "grad_norm": 2.142779588699341, "learning_rate": 1.43785168756413e-06, "loss": 0.4067525267601013, "step": 1138 }, { "epoch": 1.2297734627831716, "grad_norm": 14.230850219726562, "learning_rate": 1.4358661413343269e-06, "loss": 0.5197821855545044, "step": 1140 }, { "epoch": 1.2319309600863, "grad_norm": 1.4702306985855103, "learning_rate": 1.4338786940745943e-06, "loss": 0.6153298020362854, "step": 1142 }, { "epoch": 1.2340884573894282, "grad_norm": 3.1709959506988525, "learning_rate": 1.4318893570340476e-06, "loss": 0.47198399901390076, "step": 1144 }, { "epoch": 1.2362459546925566, "grad_norm": 2.0311388969421387, "learning_rate": 1.4298981414724972e-06, "loss": 0.4431988596916199, "step": 1146 }, { "epoch": 1.238403451995685, "grad_norm": 2.6444904804229736, "learning_rate": 1.4279050586603865e-06, "loss": 0.49952733516693115, "step": 1148 }, { "epoch": 1.2405609492988134, "grad_norm": 2.9312846660614014, "learning_rate": 1.4259101198787284e-06, "loss": 0.40768349170684814, "step": 1150 }, { "epoch": 1.2427184466019416, "grad_norm": 1.304535150527954, "learning_rate": 1.4239133364190402e-06, "loss": 0.32800549268722534, "step": 1152 }, { "epoch": 1.24487594390507, "grad_norm": 6.382114887237549, "learning_rate": 1.4219147195832796e-06, "loss": 0.5660591125488281, "step": 1154 }, { "epoch": 1.2470334412081985, "grad_norm": 1.935137152671814, "learning_rate": 1.4199142806837825e-06, "loss": 0.46538597345352173, "step": 1156 }, { "epoch": 1.249190938511327, "grad_norm": 1.4178097248077393, "learning_rate": 1.4179120310431967e-06, "loss": 0.3020792007446289, "step": 1158 }, { "epoch": 1.2513484358144553, "grad_norm": 6.318742752075195, "learning_rate": 1.41590798199442e-06, "loss": 0.5570347309112549, "step": 1160 }, { "epoch": 1.2535059331175837, "grad_norm": 1.248417615890503, "learning_rate": 1.4139021448805344e-06, "loss": 0.3992771506309509, "step": 1162 }, { "epoch": 1.255663430420712, "grad_norm": 5.451845645904541, "learning_rate": 1.4118945310547424e-06, "loss": 0.5283824801445007, "step": 1164 }, { "epoch": 1.2578209277238404, "grad_norm": 2.265537738800049, "learning_rate": 1.4098851518803032e-06, "loss": 0.41607847809791565, "step": 1166 }, { "epoch": 1.2599784250269688, "grad_norm": 0.5451850295066833, "learning_rate": 1.4078740187304678e-06, "loss": 0.44866782426834106, "step": 1168 }, { "epoch": 1.262135922330097, "grad_norm": 6.6960835456848145, "learning_rate": 1.4058611429884153e-06, "loss": 0.6595394015312195, "step": 1170 }, { "epoch": 1.2642934196332254, "grad_norm": 6.947851657867432, "learning_rate": 1.4038465360471872e-06, "loss": 0.6133137345314026, "step": 1172 }, { "epoch": 1.2664509169363538, "grad_norm": 2.5718576908111572, "learning_rate": 1.401830209309624e-06, "loss": 0.4383125901222229, "step": 1174 }, { "epoch": 1.2686084142394822, "grad_norm": 2.443553924560547, "learning_rate": 1.3998121741883012e-06, "loss": 0.38315558433532715, "step": 1176 }, { "epoch": 1.2707659115426106, "grad_norm": 0.8398682475090027, "learning_rate": 1.3977924421054623e-06, "loss": 0.22079361975193024, "step": 1178 }, { "epoch": 1.272923408845739, "grad_norm": 3.0209848880767822, "learning_rate": 1.3957710244929575e-06, "loss": 0.4939245581626892, "step": 1180 }, { "epoch": 1.2750809061488673, "grad_norm": 4.289799213409424, "learning_rate": 1.3937479327921762e-06, "loss": 0.42832162976264954, "step": 1182 }, { "epoch": 1.2772384034519957, "grad_norm": 2.087005376815796, "learning_rate": 1.3917231784539831e-06, "loss": 0.5092071294784546, "step": 1184 }, { "epoch": 1.279395900755124, "grad_norm": 1.6985106468200684, "learning_rate": 1.3896967729386545e-06, "loss": 0.6054165363311768, "step": 1186 }, { "epoch": 1.2815533980582523, "grad_norm": 3.5646963119506836, "learning_rate": 1.3876687277158117e-06, "loss": 0.47859057784080505, "step": 1188 }, { "epoch": 1.2837108953613807, "grad_norm": 3.154890537261963, "learning_rate": 1.385639054264357e-06, "loss": 0.43968018889427185, "step": 1190 }, { "epoch": 1.2858683926645091, "grad_norm": 6.229619026184082, "learning_rate": 1.383607764072409e-06, "loss": 0.5543320775032043, "step": 1192 }, { "epoch": 1.2880258899676376, "grad_norm": 12.460729598999023, "learning_rate": 1.3815748686372368e-06, "loss": 0.4493723511695862, "step": 1194 }, { "epoch": 1.290183387270766, "grad_norm": 1.6863099336624146, "learning_rate": 1.3795403794651955e-06, "loss": 0.3126695156097412, "step": 1196 }, { "epoch": 1.2923408845738944, "grad_norm": 3.3788959980010986, "learning_rate": 1.3775043080716608e-06, "loss": 0.46441030502319336, "step": 1198 }, { "epoch": 1.2944983818770226, "grad_norm": 1.3057730197906494, "learning_rate": 1.3754666659809636e-06, "loss": 0.4863712191581726, "step": 1200 }, { "epoch": 1.296655879180151, "grad_norm": 1.384608507156372, "learning_rate": 1.3734274647263258e-06, "loss": 0.41433578729629517, "step": 1202 }, { "epoch": 1.2988133764832794, "grad_norm": 0.7437410950660706, "learning_rate": 1.3713867158497935e-06, "loss": 0.3361971378326416, "step": 1204 }, { "epoch": 1.3009708737864076, "grad_norm": 5.0748090744018555, "learning_rate": 1.369344430902173e-06, "loss": 0.582435667514801, "step": 1206 }, { "epoch": 1.303128371089536, "grad_norm": 1.445181131362915, "learning_rate": 1.3673006214429657e-06, "loss": 0.49374300241470337, "step": 1208 }, { "epoch": 1.3052858683926645, "grad_norm": 2.7276389598846436, "learning_rate": 1.3652552990402993e-06, "loss": 0.49756351113319397, "step": 1210 }, { "epoch": 1.307443365695793, "grad_norm": 3.362050771713257, "learning_rate": 1.3632084752708672e-06, "loss": 0.4800053536891937, "step": 1212 }, { "epoch": 1.3096008629989213, "grad_norm": 1.3913723230361938, "learning_rate": 1.36116016171986e-06, "loss": 0.569862961769104, "step": 1214 }, { "epoch": 1.3117583603020497, "grad_norm": 0.8088376522064209, "learning_rate": 1.3591103699809009e-06, "loss": 0.43602418899536133, "step": 1216 }, { "epoch": 1.313915857605178, "grad_norm": 2.7153704166412354, "learning_rate": 1.3570591116559786e-06, "loss": 0.627713680267334, "step": 1218 }, { "epoch": 1.3160733549083063, "grad_norm": 2.235117197036743, "learning_rate": 1.3550063983553842e-06, "loss": 0.20072109997272491, "step": 1220 }, { "epoch": 1.3182308522114348, "grad_norm": 2.215144157409668, "learning_rate": 1.352952241697643e-06, "loss": 0.45614534616470337, "step": 1222 }, { "epoch": 1.3203883495145632, "grad_norm": 1.2694110870361328, "learning_rate": 1.3508966533094507e-06, "loss": 0.4190627932548523, "step": 1224 }, { "epoch": 1.3225458468176914, "grad_norm": 1.3221111297607422, "learning_rate": 1.3488396448256063e-06, "loss": 0.41167372465133667, "step": 1226 }, { "epoch": 1.3247033441208198, "grad_norm": 0.3495451509952545, "learning_rate": 1.3467812278889466e-06, "loss": 0.2586868107318878, "step": 1228 }, { "epoch": 1.3268608414239482, "grad_norm": 3.7483558654785156, "learning_rate": 1.3447214141502801e-06, "loss": 0.42079082131385803, "step": 1230 }, { "epoch": 1.3290183387270766, "grad_norm": 1.2944005727767944, "learning_rate": 1.3426602152683221e-06, "loss": 0.4828168451786041, "step": 1232 }, { "epoch": 1.331175836030205, "grad_norm": 2.611660957336426, "learning_rate": 1.3405976429096268e-06, "loss": 0.5353527665138245, "step": 1234 }, { "epoch": 1.3333333333333333, "grad_norm": 3.6428263187408447, "learning_rate": 1.3385337087485237e-06, "loss": 0.28263047337532043, "step": 1236 }, { "epoch": 1.3354908306364617, "grad_norm": 1.8777357339859009, "learning_rate": 1.3364684244670498e-06, "loss": 0.47503718733787537, "step": 1238 }, { "epoch": 1.33764832793959, "grad_norm": 1.553531527519226, "learning_rate": 1.334401801754883e-06, "loss": 0.4773551821708679, "step": 1240 }, { "epoch": 1.3398058252427185, "grad_norm": 26.932111740112305, "learning_rate": 1.3323338523092775e-06, "loss": 0.5582832098007202, "step": 1242 }, { "epoch": 1.3419633225458467, "grad_norm": 5.682314395904541, "learning_rate": 1.3302645878349972e-06, "loss": 0.3482803702354431, "step": 1244 }, { "epoch": 1.3441208198489751, "grad_norm": 1.8704055547714233, "learning_rate": 1.3281940200442492e-06, "loss": 0.5859532952308655, "step": 1246 }, { "epoch": 1.3462783171521036, "grad_norm": 2.0849342346191406, "learning_rate": 1.3261221606566161e-06, "loss": 0.571201503276825, "step": 1248 }, { "epoch": 1.348435814455232, "grad_norm": 1.3928718566894531, "learning_rate": 1.324049021398993e-06, "loss": 0.3548327088356018, "step": 1250 }, { "epoch": 1.3505933117583604, "grad_norm": 1.392311930656433, "learning_rate": 1.3219746140055185e-06, "loss": 0.5696713328361511, "step": 1252 }, { "epoch": 1.3527508090614886, "grad_norm": 1.4951963424682617, "learning_rate": 1.3198989502175077e-06, "loss": 0.34389352798461914, "step": 1254 }, { "epoch": 1.354908306364617, "grad_norm": 2.442704916000366, "learning_rate": 1.3178220417833887e-06, "loss": 0.4191893935203552, "step": 1256 }, { "epoch": 1.3570658036677454, "grad_norm": 2.9892749786376953, "learning_rate": 1.315743900458634e-06, "loss": 0.35198745131492615, "step": 1258 }, { "epoch": 1.3592233009708738, "grad_norm": 2.776257038116455, "learning_rate": 1.313664538005693e-06, "loss": 0.3809160888195038, "step": 1260 }, { "epoch": 1.361380798274002, "grad_norm": 2.1207423210144043, "learning_rate": 1.3115839661939288e-06, "loss": 0.3112916350364685, "step": 1262 }, { "epoch": 1.3635382955771305, "grad_norm": 2.34796404838562, "learning_rate": 1.3095021967995485e-06, "loss": 0.3474862575531006, "step": 1264 }, { "epoch": 1.3656957928802589, "grad_norm": 1.681514024734497, "learning_rate": 1.3074192416055375e-06, "loss": 0.6013367176055908, "step": 1266 }, { "epoch": 1.3678532901833873, "grad_norm": 1.5219907760620117, "learning_rate": 1.3053351124015935e-06, "loss": 0.44022852182388306, "step": 1268 }, { "epoch": 1.3700107874865157, "grad_norm": 10.068926811218262, "learning_rate": 1.3032498209840583e-06, "loss": 0.4306741952896118, "step": 1270 }, { "epoch": 1.3721682847896441, "grad_norm": 3.296771287918091, "learning_rate": 1.3011633791558532e-06, "loss": 0.5527811050415039, "step": 1272 }, { "epoch": 1.3743257820927723, "grad_norm": 136.3231201171875, "learning_rate": 1.2990757987264098e-06, "loss": 0.41877317428588867, "step": 1274 }, { "epoch": 1.3764832793959008, "grad_norm": 2.0969786643981934, "learning_rate": 1.2969870915116042e-06, "loss": 0.578849732875824, "step": 1276 }, { "epoch": 1.3786407766990292, "grad_norm": 4.652449131011963, "learning_rate": 1.2948972693336916e-06, "loss": 0.33083122968673706, "step": 1278 }, { "epoch": 1.3807982740021574, "grad_norm": 1.5804355144500732, "learning_rate": 1.292806344021237e-06, "loss": 0.3789401948451996, "step": 1280 }, { "epoch": 1.3829557713052858, "grad_norm": 1.5308772325515747, "learning_rate": 1.2907143274090487e-06, "loss": 0.5875998735427856, "step": 1282 }, { "epoch": 1.3851132686084142, "grad_norm": 1.4146822690963745, "learning_rate": 1.2886212313381128e-06, "loss": 0.38486555218696594, "step": 1284 }, { "epoch": 1.3872707659115426, "grad_norm": 4.086416244506836, "learning_rate": 1.2865270676555249e-06, "loss": 0.596904456615448, "step": 1286 }, { "epoch": 1.389428263214671, "grad_norm": 3.7820913791656494, "learning_rate": 1.2844318482144233e-06, "loss": 0.43893247842788696, "step": 1288 }, { "epoch": 1.3915857605177995, "grad_norm": 2.7569808959960938, "learning_rate": 1.2823355848739217e-06, "loss": 0.3261288106441498, "step": 1290 }, { "epoch": 1.3937432578209277, "grad_norm": 10.709510803222656, "learning_rate": 1.280238289499043e-06, "loss": 0.5592629909515381, "step": 1292 }, { "epoch": 1.395900755124056, "grad_norm": 3.0105295181274414, "learning_rate": 1.2781399739606513e-06, "loss": 0.5706429481506348, "step": 1294 }, { "epoch": 1.3980582524271845, "grad_norm": 0.3642590045928955, "learning_rate": 1.2760406501353845e-06, "loss": 0.4913448393344879, "step": 1296 }, { "epoch": 1.4002157497303127, "grad_norm": 1.7649108171463013, "learning_rate": 1.273940329905588e-06, "loss": 0.4015069007873535, "step": 1298 }, { "epoch": 1.4023732470334411, "grad_norm": 5.478614807128906, "learning_rate": 1.2718390251592465e-06, "loss": 0.3647070527076721, "step": 1300 }, { "epoch": 1.4045307443365695, "grad_norm": 2.2883858680725098, "learning_rate": 1.2697367477899174e-06, "loss": 0.5743715763092041, "step": 1302 }, { "epoch": 1.406688241639698, "grad_norm": 1.2849724292755127, "learning_rate": 1.2676335096966633e-06, "loss": 0.3841140866279602, "step": 1304 }, { "epoch": 1.4088457389428264, "grad_norm": 1.79099702835083, "learning_rate": 1.2655293227839841e-06, "loss": 0.4001426100730896, "step": 1306 }, { "epoch": 1.4110032362459548, "grad_norm": 2.938184976577759, "learning_rate": 1.2634241989617508e-06, "loss": 0.5245987176895142, "step": 1308 }, { "epoch": 1.413160733549083, "grad_norm": 1.6925368309020996, "learning_rate": 1.2613181501451373e-06, "loss": 0.41294950246810913, "step": 1310 }, { "epoch": 1.4153182308522114, "grad_norm": 1.1948857307434082, "learning_rate": 1.259211188254552e-06, "loss": 0.4697638154029846, "step": 1312 }, { "epoch": 1.4174757281553398, "grad_norm": 3.2041354179382324, "learning_rate": 1.257103325215573e-06, "loss": 0.47677257657051086, "step": 1314 }, { "epoch": 1.419633225458468, "grad_norm": 4.060916423797607, "learning_rate": 1.2549945729588771e-06, "loss": 0.22076305747032166, "step": 1316 }, { "epoch": 1.4217907227615965, "grad_norm": 4.826178073883057, "learning_rate": 1.2528849434201758e-06, "loss": 0.4530554711818695, "step": 1318 }, { "epoch": 1.4239482200647249, "grad_norm": 1.5924415588378906, "learning_rate": 1.2507744485401457e-06, "loss": 0.4310169517993927, "step": 1320 }, { "epoch": 1.4261057173678533, "grad_norm": 1.6999584436416626, "learning_rate": 1.2486631002643604e-06, "loss": 0.32071733474731445, "step": 1322 }, { "epoch": 1.4282632146709817, "grad_norm": 1.5567405223846436, "learning_rate": 1.2465509105432252e-06, "loss": 0.2832459509372711, "step": 1324 }, { "epoch": 1.4304207119741101, "grad_norm": 5.614641189575195, "learning_rate": 1.2444378913319067e-06, "loss": 0.47128552198410034, "step": 1326 }, { "epoch": 1.4325782092772383, "grad_norm": 1.8778231143951416, "learning_rate": 1.2423240545902674e-06, "loss": 0.38101163506507874, "step": 1328 }, { "epoch": 1.4347357065803668, "grad_norm": 3.9056172370910645, "learning_rate": 1.2402094122827964e-06, "loss": 0.537193775177002, "step": 1330 }, { "epoch": 1.4368932038834952, "grad_norm": 3.836848735809326, "learning_rate": 1.2380939763785433e-06, "loss": 0.4837642014026642, "step": 1332 }, { "epoch": 1.4390507011866236, "grad_norm": 1.4066507816314697, "learning_rate": 1.2359777588510484e-06, "loss": 0.5043050646781921, "step": 1334 }, { "epoch": 1.4412081984897518, "grad_norm": 1.7807657718658447, "learning_rate": 1.233860771678277e-06, "loss": 0.42978519201278687, "step": 1336 }, { "epoch": 1.4433656957928802, "grad_norm": 2.4499216079711914, "learning_rate": 1.23174302684255e-06, "loss": 0.5630735754966736, "step": 1338 }, { "epoch": 1.4455231930960086, "grad_norm": 2.219531297683716, "learning_rate": 1.2296245363304772e-06, "loss": 0.6489322185516357, "step": 1340 }, { "epoch": 1.447680690399137, "grad_norm": 3.5208077430725098, "learning_rate": 1.2275053121328886e-06, "loss": 0.424197793006897, "step": 1342 }, { "epoch": 1.4498381877022655, "grad_norm": 2.0139458179473877, "learning_rate": 1.2253853662447673e-06, "loss": 0.51392662525177, "step": 1344 }, { "epoch": 1.4519956850053937, "grad_norm": 3.8764588832855225, "learning_rate": 1.223264710665181e-06, "loss": 0.4180300533771515, "step": 1346 }, { "epoch": 1.454153182308522, "grad_norm": 1.3136292695999146, "learning_rate": 1.2211433573972145e-06, "loss": 0.3597021698951721, "step": 1348 }, { "epoch": 1.4563106796116505, "grad_norm": 2.6723670959472656, "learning_rate": 1.219021318447901e-06, "loss": 0.4391145408153534, "step": 1350 }, { "epoch": 1.458468176914779, "grad_norm": 2.19071102142334, "learning_rate": 1.2168986058281552e-06, "loss": 0.31397783756256104, "step": 1352 }, { "epoch": 1.4606256742179071, "grad_norm": 2.55515718460083, "learning_rate": 1.2147752315527056e-06, "loss": 0.49626126885414124, "step": 1354 }, { "epoch": 1.4627831715210355, "grad_norm": 1.1953641176223755, "learning_rate": 1.2126512076400238e-06, "loss": 0.36800915002822876, "step": 1356 }, { "epoch": 1.464940668824164, "grad_norm": 1.1821345090866089, "learning_rate": 1.2105265461122599e-06, "loss": 0.36970698833465576, "step": 1358 }, { "epoch": 1.4670981661272924, "grad_norm": 1.9997817277908325, "learning_rate": 1.208401258995173e-06, "loss": 0.24953503906726837, "step": 1360 }, { "epoch": 1.4692556634304208, "grad_norm": 1.4127711057662964, "learning_rate": 1.2062753583180617e-06, "loss": 0.6299887895584106, "step": 1362 }, { "epoch": 1.4714131607335492, "grad_norm": 1.239811897277832, "learning_rate": 1.2041488561136987e-06, "loss": 0.2647631764411926, "step": 1364 }, { "epoch": 1.4735706580366774, "grad_norm": 1.8730353116989136, "learning_rate": 1.2020217644182618e-06, "loss": 0.47313305735588074, "step": 1366 }, { "epoch": 1.4757281553398058, "grad_norm": 1.2671191692352295, "learning_rate": 1.1998940952712636e-06, "loss": 0.4221327602863312, "step": 1368 }, { "epoch": 1.4778856526429343, "grad_norm": 3.7683935165405273, "learning_rate": 1.1977658607154866e-06, "loss": 0.3292485773563385, "step": 1370 }, { "epoch": 1.4800431499460625, "grad_norm": 2.1078288555145264, "learning_rate": 1.1956370727969132e-06, "loss": 0.4748386740684509, "step": 1372 }, { "epoch": 1.4822006472491909, "grad_norm": 3.0718023777008057, "learning_rate": 1.1935077435646573e-06, "loss": 0.41127315163612366, "step": 1374 }, { "epoch": 1.4843581445523193, "grad_norm": 9.03038501739502, "learning_rate": 1.1913778850708974e-06, "loss": 0.38048920035362244, "step": 1376 }, { "epoch": 1.4865156418554477, "grad_norm": 2.5646114349365234, "learning_rate": 1.189247509370807e-06, "loss": 0.5044585466384888, "step": 1378 }, { "epoch": 1.4886731391585761, "grad_norm": 3.9271023273468018, "learning_rate": 1.1871166285224885e-06, "loss": 0.5840790271759033, "step": 1380 }, { "epoch": 1.4908306364617046, "grad_norm": 1.9364007711410522, "learning_rate": 1.1849852545869013e-06, "loss": 0.4913451671600342, "step": 1382 }, { "epoch": 1.4929881337648327, "grad_norm": 7.079308986663818, "learning_rate": 1.182853399627797e-06, "loss": 0.40108633041381836, "step": 1384 }, { "epoch": 1.4951456310679612, "grad_norm": 1.5303609371185303, "learning_rate": 1.1807210757116505e-06, "loss": 0.5875151753425598, "step": 1386 }, { "epoch": 1.4973031283710896, "grad_norm": 1.7939358949661255, "learning_rate": 1.1785882949075894e-06, "loss": 0.43406108021736145, "step": 1388 }, { "epoch": 1.4994606256742178, "grad_norm": 1.709847092628479, "learning_rate": 1.1764550692873282e-06, "loss": 0.4609090983867645, "step": 1390 }, { "epoch": 1.5016181229773462, "grad_norm": 1.4324554204940796, "learning_rate": 1.1743214109250992e-06, "loss": 0.2564505934715271, "step": 1392 }, { "epoch": 1.5037756202804746, "grad_norm": 2.9874749183654785, "learning_rate": 1.1721873318975835e-06, "loss": 0.46675199270248413, "step": 1394 }, { "epoch": 1.505933117583603, "grad_norm": 3.750638008117676, "learning_rate": 1.1700528442838442e-06, "loss": 0.5055999755859375, "step": 1396 }, { "epoch": 1.5080906148867315, "grad_norm": 2.8105647563934326, "learning_rate": 1.167917960165256e-06, "loss": 0.5268608331680298, "step": 1398 }, { "epoch": 1.5102481121898599, "grad_norm": 3.50753116607666, "learning_rate": 1.1657826916254382e-06, "loss": 0.5102010369300842, "step": 1400 }, { "epoch": 1.512405609492988, "grad_norm": 10.553208351135254, "learning_rate": 1.1636470507501863e-06, "loss": 0.4071239233016968, "step": 1402 }, { "epoch": 1.5145631067961165, "grad_norm": 3.4755797386169434, "learning_rate": 1.1615110496274028e-06, "loss": 0.3140917420387268, "step": 1404 }, { "epoch": 1.516720604099245, "grad_norm": 2.3255038261413574, "learning_rate": 1.1593747003470294e-06, "loss": 0.49230116605758667, "step": 1406 }, { "epoch": 1.5188781014023731, "grad_norm": 1.2084012031555176, "learning_rate": 1.1572380150009777e-06, "loss": 0.39797013998031616, "step": 1408 }, { "epoch": 1.5210355987055015, "grad_norm": 5.355250358581543, "learning_rate": 1.1551010056830634e-06, "loss": 0.36559203267097473, "step": 1410 }, { "epoch": 1.52319309600863, "grad_norm": 1.0859466791152954, "learning_rate": 1.152963684488934e-06, "loss": 0.20361725986003876, "step": 1412 }, { "epoch": 1.5253505933117584, "grad_norm": 3.301490306854248, "learning_rate": 1.150826063516003e-06, "loss": 0.36109161376953125, "step": 1414 }, { "epoch": 1.5275080906148868, "grad_norm": 2.034646511077881, "learning_rate": 1.1486881548633802e-06, "loss": 0.4435052275657654, "step": 1416 }, { "epoch": 1.5296655879180152, "grad_norm": 1.7059470415115356, "learning_rate": 1.1465499706318048e-06, "loss": 0.4154685437679291, "step": 1418 }, { "epoch": 1.5318230852211436, "grad_norm": 1.5160272121429443, "learning_rate": 1.1444115229235745e-06, "loss": 0.37496164441108704, "step": 1420 }, { "epoch": 1.5339805825242718, "grad_norm": 2.6280198097229004, "learning_rate": 1.1422728238424785e-06, "loss": 0.48741182684898376, "step": 1422 }, { "epoch": 1.5361380798274002, "grad_norm": 14.306265830993652, "learning_rate": 1.14013388549373e-06, "loss": 0.5213165879249573, "step": 1424 }, { "epoch": 1.5382955771305284, "grad_norm": 1.211489200592041, "learning_rate": 1.1379947199838952e-06, "loss": 0.345187783241272, "step": 1426 }, { "epoch": 1.5404530744336569, "grad_norm": 3.2337164878845215, "learning_rate": 1.1358553394208268e-06, "loss": 0.5196102857589722, "step": 1428 }, { "epoch": 1.5426105717367853, "grad_norm": 1.5404866933822632, "learning_rate": 1.1337157559135942e-06, "loss": 0.4148750603199005, "step": 1430 }, { "epoch": 1.5447680690399137, "grad_norm": 1.683718204498291, "learning_rate": 1.1315759815724152e-06, "loss": 0.32485026121139526, "step": 1432 }, { "epoch": 1.5469255663430421, "grad_norm": 2.2065541744232178, "learning_rate": 1.1294360285085888e-06, "loss": 0.2961767017841339, "step": 1434 }, { "epoch": 1.5490830636461705, "grad_norm": 1.0630570650100708, "learning_rate": 1.1272959088344253e-06, "loss": 0.37115591764450073, "step": 1436 }, { "epoch": 1.551240560949299, "grad_norm": 2.599900245666504, "learning_rate": 1.1251556346631762e-06, "loss": 0.5358873605728149, "step": 1438 }, { "epoch": 1.5533980582524272, "grad_norm": 1.2480677366256714, "learning_rate": 1.1230152181089708e-06, "loss": 0.46197211742401123, "step": 1440 }, { "epoch": 1.5555555555555556, "grad_norm": 2.2794196605682373, "learning_rate": 1.1208746712867419e-06, "loss": 0.44740840792655945, "step": 1442 }, { "epoch": 1.5577130528586838, "grad_norm": 1.7489802837371826, "learning_rate": 1.1187340063121593e-06, "loss": 0.4339655339717865, "step": 1444 }, { "epoch": 1.5598705501618122, "grad_norm": 3.410910129547119, "learning_rate": 1.116593235301564e-06, "loss": 0.3300541639328003, "step": 1446 }, { "epoch": 1.5620280474649406, "grad_norm": 0.5800649523735046, "learning_rate": 1.1144523703718942e-06, "loss": 0.5032283663749695, "step": 1448 }, { "epoch": 1.564185544768069, "grad_norm": 1.7073270082473755, "learning_rate": 1.1123114236406224e-06, "loss": 0.4437793791294098, "step": 1450 }, { "epoch": 1.5663430420711975, "grad_norm": 1.9129263162612915, "learning_rate": 1.1101704072256819e-06, "loss": 0.49655881524086, "step": 1452 }, { "epoch": 1.5685005393743259, "grad_norm": 3.6259055137634277, "learning_rate": 1.1080293332454016e-06, "loss": 0.331562340259552, "step": 1454 }, { "epoch": 1.5706580366774543, "grad_norm": 1.8879085779190063, "learning_rate": 1.1058882138184363e-06, "loss": 0.5420922040939331, "step": 1456 }, { "epoch": 1.5728155339805825, "grad_norm": 1.6056373119354248, "learning_rate": 1.103747061063697e-06, "loss": 0.2305726557970047, "step": 1458 }, { "epoch": 1.574973031283711, "grad_norm": 2.3105075359344482, "learning_rate": 1.101605887100285e-06, "loss": 0.4295492470264435, "step": 1460 }, { "epoch": 1.577130528586839, "grad_norm": 3.3066842555999756, "learning_rate": 1.09946470404742e-06, "loss": 0.5346636772155762, "step": 1462 }, { "epoch": 1.5792880258899675, "grad_norm": 5.481215476989746, "learning_rate": 1.097323524024374e-06, "loss": 0.669352114200592, "step": 1464 }, { "epoch": 1.581445523193096, "grad_norm": 5.5241851806640625, "learning_rate": 1.095182359150402e-06, "loss": 0.5989066958427429, "step": 1466 }, { "epoch": 1.5836030204962244, "grad_norm": 1.298604130744934, "learning_rate": 1.0930412215446723e-06, "loss": 0.3661651015281677, "step": 1468 }, { "epoch": 1.5857605177993528, "grad_norm": 4.695067405700684, "learning_rate": 1.0909001233262001e-06, "loss": 0.449363648891449, "step": 1470 }, { "epoch": 1.5879180151024812, "grad_norm": 2.782097578048706, "learning_rate": 1.0887590766137766e-06, "loss": 0.5595487356185913, "step": 1472 }, { "epoch": 1.5900755124056096, "grad_norm": 1.2103036642074585, "learning_rate": 1.0866180935259022e-06, "loss": 0.38902321457862854, "step": 1474 }, { "epoch": 1.5922330097087378, "grad_norm": 1.6246592998504639, "learning_rate": 1.084477186180717e-06, "loss": 0.5024740099906921, "step": 1476 }, { "epoch": 1.5943905070118662, "grad_norm": 1.3438127040863037, "learning_rate": 1.0823363666959322e-06, "loss": 0.47724461555480957, "step": 1478 }, { "epoch": 1.5965480043149944, "grad_norm": 1.5329099893569946, "learning_rate": 1.0801956471887618e-06, "loss": 0.43613773584365845, "step": 1480 }, { "epoch": 1.5987055016181229, "grad_norm": 2.6041982173919678, "learning_rate": 1.078055039775854e-06, "loss": 0.5445818305015564, "step": 1482 }, { "epoch": 1.6008629989212513, "grad_norm": 3.287353277206421, "learning_rate": 1.075914556573222e-06, "loss": 0.35657113790512085, "step": 1484 }, { "epoch": 1.6030204962243797, "grad_norm": 6.16733455657959, "learning_rate": 1.0737742096961774e-06, "loss": 0.5397022366523743, "step": 1486 }, { "epoch": 1.6051779935275081, "grad_norm": 1.3404687643051147, "learning_rate": 1.0716340112592582e-06, "loss": 0.40695685148239136, "step": 1488 }, { "epoch": 1.6073354908306365, "grad_norm": 4.531323432922363, "learning_rate": 1.0694939733761635e-06, "loss": 0.43187639117240906, "step": 1490 }, { "epoch": 1.609492988133765, "grad_norm": 4.229406833648682, "learning_rate": 1.067354108159684e-06, "loss": 0.3659261465072632, "step": 1492 }, { "epoch": 1.6116504854368932, "grad_norm": 1.4188188314437866, "learning_rate": 1.0652144277216315e-06, "loss": 0.5332222580909729, "step": 1494 }, { "epoch": 1.6138079827400216, "grad_norm": 2.903252363204956, "learning_rate": 1.063074944172774e-06, "loss": 0.4275670647621155, "step": 1496 }, { "epoch": 1.61596548004315, "grad_norm": 1.9704622030258179, "learning_rate": 1.060935669622763e-06, "loss": 0.5114681720733643, "step": 1498 }, { "epoch": 1.6181229773462782, "grad_norm": 1.4989230632781982, "learning_rate": 1.0587966161800688e-06, "loss": 0.4305647909641266, "step": 1500 }, { "epoch": 1.6202804746494066, "grad_norm": 4.043560981750488, "learning_rate": 1.0566577959519086e-06, "loss": 0.34898895025253296, "step": 1502 }, { "epoch": 1.622437971952535, "grad_norm": 3.2984836101531982, "learning_rate": 1.0545192210441814e-06, "loss": 0.3457680642604828, "step": 1504 }, { "epoch": 1.6245954692556634, "grad_norm": 2.0970866680145264, "learning_rate": 1.0523809035613964e-06, "loss": 0.45543625950813293, "step": 1506 }, { "epoch": 1.6267529665587919, "grad_norm": 4.432509422302246, "learning_rate": 1.0502428556066059e-06, "loss": 0.33377963304519653, "step": 1508 }, { "epoch": 1.6289104638619203, "grad_norm": 1.2249876260757446, "learning_rate": 1.0481050892813368e-06, "loss": 0.3518203794956207, "step": 1510 }, { "epoch": 1.6310679611650487, "grad_norm": 3.6273698806762695, "learning_rate": 1.0459676166855223e-06, "loss": 0.47581151127815247, "step": 1512 }, { "epoch": 1.633225458468177, "grad_norm": 1.7668628692626953, "learning_rate": 1.0438304499174325e-06, "loss": 0.31876808404922485, "step": 1514 }, { "epoch": 1.6353829557713053, "grad_norm": 4.061316013336182, "learning_rate": 1.0416936010736064e-06, "loss": 0.47807684540748596, "step": 1516 }, { "epoch": 1.6375404530744335, "grad_norm": 1.9564175605773926, "learning_rate": 1.0395570822487845e-06, "loss": 0.47794413566589355, "step": 1518 }, { "epoch": 1.639697950377562, "grad_norm": 1.4420032501220703, "learning_rate": 1.0374209055358385e-06, "loss": 0.6091484427452087, "step": 1520 }, { "epoch": 1.6418554476806904, "grad_norm": 2.4212918281555176, "learning_rate": 1.0352850830257037e-06, "loss": 0.3609981834888458, "step": 1522 }, { "epoch": 1.6440129449838188, "grad_norm": 17.685544967651367, "learning_rate": 1.0331496268073113e-06, "loss": 0.3519137501716614, "step": 1524 }, { "epoch": 1.6461704422869472, "grad_norm": 2.5702126026153564, "learning_rate": 1.031014548967518e-06, "loss": 0.4019058346748352, "step": 1526 }, { "epoch": 1.6483279395900756, "grad_norm": 1.5977301597595215, "learning_rate": 1.0288798615910409e-06, "loss": 0.4482097923755646, "step": 1528 }, { "epoch": 1.650485436893204, "grad_norm": 3.8261749744415283, "learning_rate": 1.0267455767603842e-06, "loss": 0.5603641867637634, "step": 1530 }, { "epoch": 1.6526429341963322, "grad_norm": 2.4676754474639893, "learning_rate": 1.0246117065557762e-06, "loss": 0.6466296315193176, "step": 1532 }, { "epoch": 1.6548004314994607, "grad_norm": 9.348182678222656, "learning_rate": 1.0224782630550976e-06, "loss": 0.4512023627758026, "step": 1534 }, { "epoch": 1.6569579288025889, "grad_norm": 1.4680399894714355, "learning_rate": 1.020345258333813e-06, "loss": 0.3725220561027527, "step": 1536 }, { "epoch": 1.6591154261057173, "grad_norm": 1.6723597049713135, "learning_rate": 1.0182127044649052e-06, "loss": 0.5063510537147522, "step": 1538 }, { "epoch": 1.6612729234088457, "grad_norm": 1.5339092016220093, "learning_rate": 1.0160806135188028e-06, "loss": 0.46868813037872314, "step": 1540 }, { "epoch": 1.6634304207119741, "grad_norm": 1.4481370449066162, "learning_rate": 1.0139489975633166e-06, "loss": 0.44415712356567383, "step": 1542 }, { "epoch": 1.6655879180151025, "grad_norm": 4.979800701141357, "learning_rate": 1.0118178686635677e-06, "loss": 0.3348858952522278, "step": 1544 }, { "epoch": 1.667745415318231, "grad_norm": 2.358186721801758, "learning_rate": 1.00968723888192e-06, "loss": 0.42780208587646484, "step": 1546 }, { "epoch": 1.6699029126213594, "grad_norm": 1.201817512512207, "learning_rate": 1.0075571202779138e-06, "loss": 0.46995261311531067, "step": 1548 }, { "epoch": 1.6720604099244876, "grad_norm": 3.709390878677368, "learning_rate": 1.0054275249081947e-06, "loss": 0.35104840993881226, "step": 1550 }, { "epoch": 1.674217907227616, "grad_norm": 1.4292689561843872, "learning_rate": 1.0032984648264479e-06, "loss": 0.4314435124397278, "step": 1552 }, { "epoch": 1.6763754045307442, "grad_norm": 1.3379240036010742, "learning_rate": 1.0011699520833272e-06, "loss": 0.4032558798789978, "step": 1554 }, { "epoch": 1.6785329018338726, "grad_norm": 7.688792705535889, "learning_rate": 9.990419987263904e-07, "loss": 0.4385361671447754, "step": 1556 }, { "epoch": 1.680690399137001, "grad_norm": 3.1750102043151855, "learning_rate": 9.969146168000277e-07, "loss": 0.31719791889190674, "step": 1558 }, { "epoch": 1.6828478964401294, "grad_norm": 1.8544740676879883, "learning_rate": 9.947878183453955e-07, "loss": 0.5202147364616394, "step": 1560 }, { "epoch": 1.6850053937432579, "grad_norm": 1.8201504945755005, "learning_rate": 9.926616154003478e-07, "loss": 0.34038931131362915, "step": 1562 }, { "epoch": 1.6871628910463863, "grad_norm": 1.8023303747177124, "learning_rate": 9.905360199993674e-07, "loss": 0.3473019599914551, "step": 1564 }, { "epoch": 1.6893203883495147, "grad_norm": 2.248263120651245, "learning_rate": 9.884110441734992e-07, "loss": 0.49435266852378845, "step": 1566 }, { "epoch": 1.691477885652643, "grad_norm": 2.6698451042175293, "learning_rate": 9.862866999502805e-07, "loss": 0.4461665451526642, "step": 1568 }, { "epoch": 1.6936353829557713, "grad_norm": 1.4548275470733643, "learning_rate": 9.841629993536741e-07, "loss": 0.5574808120727539, "step": 1570 }, { "epoch": 1.6957928802588995, "grad_norm": 1.4881387948989868, "learning_rate": 9.820399544039997e-07, "loss": 0.3747144043445587, "step": 1572 }, { "epoch": 1.697950377562028, "grad_norm": 43.702919006347656, "learning_rate": 9.799175771178662e-07, "loss": 0.543049693107605, "step": 1574 }, { "epoch": 1.7001078748651564, "grad_norm": 3.537771463394165, "learning_rate": 9.777958795081024e-07, "loss": 0.38331982493400574, "step": 1576 }, { "epoch": 1.7022653721682848, "grad_norm": 1.9265162944793701, "learning_rate": 9.75674873583692e-07, "loss": 0.3932670056819916, "step": 1578 }, { "epoch": 1.7044228694714132, "grad_norm": 1.3593825101852417, "learning_rate": 9.735545713497021e-07, "loss": 0.4138597249984741, "step": 1580 }, { "epoch": 1.7065803667745416, "grad_norm": 2.415477991104126, "learning_rate": 9.714349848072175e-07, "loss": 0.4992269277572632, "step": 1582 }, { "epoch": 1.70873786407767, "grad_norm": 1.0291266441345215, "learning_rate": 9.693161259532722e-07, "loss": 0.4245167076587677, "step": 1584 }, { "epoch": 1.7108953613807982, "grad_norm": 1.4725301265716553, "learning_rate": 9.671980067807806e-07, "loss": 0.35596776008605957, "step": 1586 }, { "epoch": 1.7130528586839266, "grad_norm": 1.3389267921447754, "learning_rate": 9.650806392784719e-07, "loss": 0.3590199947357178, "step": 1588 }, { "epoch": 1.715210355987055, "grad_norm": 1.9211981296539307, "learning_rate": 9.629640354308188e-07, "loss": 0.5305579900741577, "step": 1590 }, { "epoch": 1.7173678532901833, "grad_norm": 1.3896666765213013, "learning_rate": 9.60848207217974e-07, "loss": 0.3872862458229065, "step": 1592 }, { "epoch": 1.7195253505933117, "grad_norm": 1.2243990898132324, "learning_rate": 9.587331666156988e-07, "loss": 0.5288591384887695, "step": 1594 }, { "epoch": 1.72168284789644, "grad_norm": 1.8954887390136719, "learning_rate": 9.566189255952956e-07, "loss": 0.43896806240081787, "step": 1596 }, { "epoch": 1.7238403451995685, "grad_norm": 1.1927108764648438, "learning_rate": 9.545054961235435e-07, "loss": 0.4235879182815552, "step": 1598 }, { "epoch": 1.725997842502697, "grad_norm": 7.993542194366455, "learning_rate": 9.523928901626255e-07, "loss": 0.35887616872787476, "step": 1600 }, { "epoch": 1.7281553398058254, "grad_norm": 2.1313676834106445, "learning_rate": 9.502811196700656e-07, "loss": 0.46110397577285767, "step": 1602 }, { "epoch": 1.7303128371089536, "grad_norm": 1.278878092765808, "learning_rate": 9.481701965986574e-07, "loss": 0.3147183656692505, "step": 1604 }, { "epoch": 1.732470334412082, "grad_norm": 3.170421838760376, "learning_rate": 9.460601328963996e-07, "loss": 0.24724824726581573, "step": 1606 }, { "epoch": 1.7346278317152104, "grad_norm": 1.7401503324508667, "learning_rate": 9.439509405064254e-07, "loss": 0.41423508524894714, "step": 1608 }, { "epoch": 1.7367853290183386, "grad_norm": 1.8899052143096924, "learning_rate": 9.41842631366937e-07, "loss": 0.5291723608970642, "step": 1610 }, { "epoch": 1.738942826321467, "grad_norm": 2.190075635910034, "learning_rate": 9.397352174111372e-07, "loss": 0.49489831924438477, "step": 1612 }, { "epoch": 1.7411003236245954, "grad_norm": 4.175290584564209, "learning_rate": 9.376287105671621e-07, "loss": 0.2998746633529663, "step": 1614 }, { "epoch": 1.7432578209277239, "grad_norm": 1.679629921913147, "learning_rate": 9.355231227580132e-07, "loss": 0.4566305875778198, "step": 1616 }, { "epoch": 1.7454153182308523, "grad_norm": 1.7757675647735596, "learning_rate": 9.334184659014901e-07, "loss": 0.36898234486579895, "step": 1618 }, { "epoch": 1.7475728155339807, "grad_norm": 24.852197647094727, "learning_rate": 9.313147519101237e-07, "loss": 0.2811485826969147, "step": 1620 }, { "epoch": 1.7497303128371091, "grad_norm": 1.6118603944778442, "learning_rate": 9.292119926911078e-07, "loss": 0.2936355173587799, "step": 1622 }, { "epoch": 1.7518878101402373, "grad_norm": 1.2674829959869385, "learning_rate": 9.271102001462321e-07, "loss": 0.3665968179702759, "step": 1624 }, { "epoch": 1.7540453074433657, "grad_norm": 2.609710216522217, "learning_rate": 9.250093861718151e-07, "loss": 0.38114845752716064, "step": 1626 }, { "epoch": 1.756202804746494, "grad_norm": 2.0557167530059814, "learning_rate": 9.229095626586362e-07, "loss": 0.4779360294342041, "step": 1628 }, { "epoch": 1.7583603020496223, "grad_norm": 2.9698874950408936, "learning_rate": 9.208107414918691e-07, "loss": 0.5487996935844421, "step": 1630 }, { "epoch": 1.7605177993527508, "grad_norm": 1.6979955434799194, "learning_rate": 9.187129345510134e-07, "loss": 0.5224738121032715, "step": 1632 }, { "epoch": 1.7626752966558792, "grad_norm": 2.131030321121216, "learning_rate": 9.166161537098287e-07, "loss": 0.33794957399368286, "step": 1634 }, { "epoch": 1.7648327939590076, "grad_norm": 1.3157271146774292, "learning_rate": 9.145204108362672e-07, "loss": 0.49309784173965454, "step": 1636 }, { "epoch": 1.766990291262136, "grad_norm": 1.6136844158172607, "learning_rate": 9.124257177924049e-07, "loss": 0.5821846723556519, "step": 1638 }, { "epoch": 1.7691477885652644, "grad_norm": 1.258776068687439, "learning_rate": 9.10332086434377e-07, "loss": 0.46728694438934326, "step": 1640 }, { "epoch": 1.7713052858683926, "grad_norm": 1.5475536584854126, "learning_rate": 9.082395286123081e-07, "loss": 0.4196864068508148, "step": 1642 }, { "epoch": 1.773462783171521, "grad_norm": 3.12204909324646, "learning_rate": 9.061480561702482e-07, "loss": 0.42648231983184814, "step": 1644 }, { "epoch": 1.7756202804746493, "grad_norm": 4.430125713348389, "learning_rate": 9.040576809461016e-07, "loss": 0.5809032917022705, "step": 1646 }, { "epoch": 1.7777777777777777, "grad_norm": 1.5230090618133545, "learning_rate": 9.019684147715649e-07, "loss": 0.4213182330131531, "step": 1648 }, { "epoch": 1.779935275080906, "grad_norm": 2.2308318614959717, "learning_rate": 8.99880269472056e-07, "loss": 0.2347421497106552, "step": 1650 }, { "epoch": 1.7820927723840345, "grad_norm": 1.6177752017974854, "learning_rate": 8.97793256866648e-07, "loss": 0.4257172644138336, "step": 1652 }, { "epoch": 1.784250269687163, "grad_norm": 2.0257010459899902, "learning_rate": 8.957073887680046e-07, "loss": 0.3010298013687134, "step": 1654 }, { "epoch": 1.7864077669902914, "grad_norm": 2.075418472290039, "learning_rate": 8.936226769823094e-07, "loss": 0.5388916730880737, "step": 1656 }, { "epoch": 1.7885652642934198, "grad_norm": 1.9110989570617676, "learning_rate": 8.915391333092028e-07, "loss": 0.40239423513412476, "step": 1658 }, { "epoch": 1.790722761596548, "grad_norm": 1.4136828184127808, "learning_rate": 8.894567695417128e-07, "loss": 0.44491565227508545, "step": 1660 }, { "epoch": 1.7928802588996764, "grad_norm": 1.7407686710357666, "learning_rate": 8.873755974661894e-07, "loss": 0.4648374021053314, "step": 1662 }, { "epoch": 1.7950377562028046, "grad_norm": 1.3232940435409546, "learning_rate": 8.852956288622373e-07, "loss": 0.4256327450275421, "step": 1664 }, { "epoch": 1.797195253505933, "grad_norm": 2.126704692840576, "learning_rate": 8.832168755026495e-07, "loss": 0.1840769350528717, "step": 1666 }, { "epoch": 1.7993527508090614, "grad_norm": 1.6251252889633179, "learning_rate": 8.81139349153341e-07, "loss": 0.4822881519794464, "step": 1668 }, { "epoch": 1.8015102481121898, "grad_norm": 1.9323124885559082, "learning_rate": 8.790630615732808e-07, "loss": 0.4157404899597168, "step": 1670 }, { "epoch": 1.8036677454153183, "grad_norm": 4.677561283111572, "learning_rate": 8.769880245144277e-07, "loss": 0.3802054226398468, "step": 1672 }, { "epoch": 1.8058252427184467, "grad_norm": 0.5432685613632202, "learning_rate": 8.749142497216613e-07, "loss": 0.22273704409599304, "step": 1674 }, { "epoch": 1.807982740021575, "grad_norm": 3.5203936100006104, "learning_rate": 8.728417489327174e-07, "loss": 0.546721339225769, "step": 1676 }, { "epoch": 1.8101402373247033, "grad_norm": 1.5164800882339478, "learning_rate": 8.707705338781202e-07, "loss": 0.5539653897285461, "step": 1678 }, { "epoch": 1.8122977346278317, "grad_norm": 4.041696071624756, "learning_rate": 8.687006162811175e-07, "loss": 0.48323866724967957, "step": 1680 }, { "epoch": 1.81445523193096, "grad_norm": 2.8498449325561523, "learning_rate": 8.666320078576125e-07, "loss": 0.37030312418937683, "step": 1682 }, { "epoch": 1.8166127292340883, "grad_norm": 2.4165847301483154, "learning_rate": 8.645647203160988e-07, "loss": 0.535261869430542, "step": 1684 }, { "epoch": 1.8187702265372168, "grad_norm": 1.3950622081756592, "learning_rate": 8.624987653575935e-07, "loss": 0.09442806243896484, "step": 1686 }, { "epoch": 1.8209277238403452, "grad_norm": 3.6123199462890625, "learning_rate": 8.604341546755711e-07, "loss": 0.4735386073589325, "step": 1688 }, { "epoch": 1.8230852211434736, "grad_norm": 1.8474417924880981, "learning_rate": 8.583708999558981e-07, "loss": 0.42983824014663696, "step": 1690 }, { "epoch": 1.825242718446602, "grad_norm": 4.4611406326293945, "learning_rate": 8.563090128767643e-07, "loss": 0.4846471846103668, "step": 1692 }, { "epoch": 1.8274002157497304, "grad_norm": 4.02655553817749, "learning_rate": 8.54248505108621e-07, "loss": 0.4285997152328491, "step": 1694 }, { "epoch": 1.8295577130528586, "grad_norm": 1.1664454936981201, "learning_rate": 8.521893883141114e-07, "loss": 0.3732617199420929, "step": 1696 }, { "epoch": 1.831715210355987, "grad_norm": 2.430764675140381, "learning_rate": 8.501316741480044e-07, "loss": 0.5520771741867065, "step": 1698 }, { "epoch": 1.8338727076591155, "grad_norm": 1.7392953634262085, "learning_rate": 8.480753742571325e-07, "loss": 0.4468059241771698, "step": 1700 }, { "epoch": 1.8360302049622437, "grad_norm": 1.8151521682739258, "learning_rate": 8.460205002803206e-07, "loss": 0.623181939125061, "step": 1702 }, { "epoch": 1.838187702265372, "grad_norm": 1.6103137731552124, "learning_rate": 8.439670638483254e-07, "loss": 0.47068604826927185, "step": 1704 }, { "epoch": 1.8403451995685005, "grad_norm": 1.699935793876648, "learning_rate": 8.419150765837644e-07, "loss": 0.461783230304718, "step": 1706 }, { "epoch": 1.842502696871629, "grad_norm": 1.5268728733062744, "learning_rate": 8.398645501010544e-07, "loss": 0.4249412715435028, "step": 1708 }, { "epoch": 1.8446601941747574, "grad_norm": 1.973617434501648, "learning_rate": 8.378154960063439e-07, "loss": 0.3225463628768921, "step": 1710 }, { "epoch": 1.8468176914778858, "grad_norm": 2.0861403942108154, "learning_rate": 8.357679258974471e-07, "loss": 0.41262945532798767, "step": 1712 }, { "epoch": 1.8489751887810142, "grad_norm": 1.1829684972763062, "learning_rate": 8.33721851363779e-07, "loss": 0.3057762086391449, "step": 1714 }, { "epoch": 1.8511326860841424, "grad_norm": 2.940964937210083, "learning_rate": 8.316772839862889e-07, "loss": 0.49465298652648926, "step": 1716 }, { "epoch": 1.8532901833872708, "grad_norm": 2.7260243892669678, "learning_rate": 8.296342353373964e-07, "loss": 0.3695753216743469, "step": 1718 }, { "epoch": 1.855447680690399, "grad_norm": 1.9509867429733276, "learning_rate": 8.275927169809245e-07, "loss": 0.33289045095443726, "step": 1720 }, { "epoch": 1.8576051779935274, "grad_norm": 1.32254159450531, "learning_rate": 8.255527404720346e-07, "loss": 0.48791223764419556, "step": 1722 }, { "epoch": 1.8597626752966558, "grad_norm": 1.0751956701278687, "learning_rate": 8.235143173571615e-07, "loss": 0.4154895544052124, "step": 1724 }, { "epoch": 1.8619201725997843, "grad_norm": 2.691671371459961, "learning_rate": 8.214774591739469e-07, "loss": 0.4291550815105438, "step": 1726 }, { "epoch": 1.8640776699029127, "grad_norm": 2.053277015686035, "learning_rate": 8.194421774511757e-07, "loss": 0.19994314014911652, "step": 1728 }, { "epoch": 1.866235167206041, "grad_norm": 0.4679964482784271, "learning_rate": 8.174084837087091e-07, "loss": 0.25225332379341125, "step": 1730 }, { "epoch": 1.8683926645091695, "grad_norm": 8.203169822692871, "learning_rate": 8.15376389457421e-07, "loss": 0.4160417914390564, "step": 1732 }, { "epoch": 1.8705501618122977, "grad_norm": 2.1094777584075928, "learning_rate": 8.133459061991312e-07, "loss": 0.3063911199569702, "step": 1734 }, { "epoch": 1.8727076591154261, "grad_norm": 5.972533702850342, "learning_rate": 8.113170454265421e-07, "loss": 0.48280882835388184, "step": 1736 }, { "epoch": 1.8748651564185543, "grad_norm": 1.7371788024902344, "learning_rate": 8.092898186231722e-07, "loss": 0.5959540605545044, "step": 1738 }, { "epoch": 1.8770226537216828, "grad_norm": 3.3627707958221436, "learning_rate": 8.072642372632914e-07, "loss": 0.4884318709373474, "step": 1740 }, { "epoch": 1.8791801510248112, "grad_norm": 2.1984074115753174, "learning_rate": 8.052403128118564e-07, "loss": 0.6091974377632141, "step": 1742 }, { "epoch": 1.8813376483279396, "grad_norm": 2.3978047370910645, "learning_rate": 8.032180567244457e-07, "loss": 0.44491517543792725, "step": 1744 }, { "epoch": 1.883495145631068, "grad_norm": 1.8564910888671875, "learning_rate": 8.011974804471953e-07, "loss": 0.356891930103302, "step": 1746 }, { "epoch": 1.8856526429341964, "grad_norm": 1.3446942567825317, "learning_rate": 7.991785954167318e-07, "loss": 0.23551291227340698, "step": 1748 }, { "epoch": 1.8878101402373249, "grad_norm": 0.45997709035873413, "learning_rate": 7.971614130601109e-07, "loss": 0.4230949878692627, "step": 1750 }, { "epoch": 1.889967637540453, "grad_norm": 38.918373107910156, "learning_rate": 7.951459447947506e-07, "loss": 0.47550415992736816, "step": 1752 }, { "epoch": 1.8921251348435815, "grad_norm": 2.1779584884643555, "learning_rate": 7.931322020283658e-07, "loss": 0.34226706624031067, "step": 1754 }, { "epoch": 1.8942826321467097, "grad_norm": 1.3864846229553223, "learning_rate": 7.911201961589067e-07, "loss": 0.4829237163066864, "step": 1756 }, { "epoch": 1.896440129449838, "grad_norm": 13.083473205566406, "learning_rate": 7.89109938574491e-07, "loss": 0.4935177266597748, "step": 1758 }, { "epoch": 1.8985976267529665, "grad_norm": 1.8217555284500122, "learning_rate": 7.871014406533422e-07, "loss": 0.33267736434936523, "step": 1760 }, { "epoch": 1.900755124056095, "grad_norm": 1.1673423051834106, "learning_rate": 7.850947137637231e-07, "loss": 0.5361051559448242, "step": 1762 }, { "epoch": 1.9029126213592233, "grad_norm": 2.398650646209717, "learning_rate": 7.830897692638723e-07, "loss": 0.45928269624710083, "step": 1764 }, { "epoch": 1.9050701186623518, "grad_norm": 1.6035159826278687, "learning_rate": 7.810866185019411e-07, "loss": 0.40345799922943115, "step": 1766 }, { "epoch": 1.9072276159654802, "grad_norm": 3.5140364170074463, "learning_rate": 7.790852728159263e-07, "loss": 0.4371829032897949, "step": 1768 }, { "epoch": 1.9093851132686084, "grad_norm": 2.60213041305542, "learning_rate": 7.770857435336096e-07, "loss": 0.4061744213104248, "step": 1770 }, { "epoch": 1.9115426105717368, "grad_norm": 1.3775845766067505, "learning_rate": 7.750880419724901e-07, "loss": 0.4554259181022644, "step": 1772 }, { "epoch": 1.913700107874865, "grad_norm": 1.0794130563735962, "learning_rate": 7.730921794397233e-07, "loss": 0.5084207057952881, "step": 1774 }, { "epoch": 1.9158576051779934, "grad_norm": 2.0664308071136475, "learning_rate": 7.710981672320547e-07, "loss": 0.41404515504837036, "step": 1776 }, { "epoch": 1.9180151024811218, "grad_norm": 2.6501731872558594, "learning_rate": 7.691060166357565e-07, "loss": 0.43099674582481384, "step": 1778 }, { "epoch": 1.9201725997842503, "grad_norm": 1.3328322172164917, "learning_rate": 7.671157389265657e-07, "loss": 0.28375762701034546, "step": 1780 }, { "epoch": 1.9223300970873787, "grad_norm": 1.3328646421432495, "learning_rate": 7.651273453696166e-07, "loss": 0.3038649260997772, "step": 1782 }, { "epoch": 1.924487594390507, "grad_norm": 1.6350358724594116, "learning_rate": 7.631408472193804e-07, "loss": 0.37957847118377686, "step": 1784 }, { "epoch": 1.9266450916936355, "grad_norm": 2.626065731048584, "learning_rate": 7.611562557195992e-07, "loss": 0.5506111979484558, "step": 1786 }, { "epoch": 1.9288025889967637, "grad_norm": 1.2828840017318726, "learning_rate": 7.591735821032246e-07, "loss": 0.27725642919540405, "step": 1788 }, { "epoch": 1.9309600862998921, "grad_norm": 2.3256094455718994, "learning_rate": 7.571928375923513e-07, "loss": 0.5789600014686584, "step": 1790 }, { "epoch": 1.9331175836030206, "grad_norm": 1.5279923677444458, "learning_rate": 7.552140333981565e-07, "loss": 0.3936736583709717, "step": 1792 }, { "epoch": 1.9352750809061487, "grad_norm": 1.2446404695510864, "learning_rate": 7.532371807208333e-07, "loss": 0.3211576044559479, "step": 1794 }, { "epoch": 1.9374325782092772, "grad_norm": 4.188495635986328, "learning_rate": 7.51262290749531e-07, "loss": 0.6068055629730225, "step": 1796 }, { "epoch": 1.9395900755124056, "grad_norm": 0.5956944227218628, "learning_rate": 7.49289374662289e-07, "loss": 0.49566900730133057, "step": 1798 }, { "epoch": 1.941747572815534, "grad_norm": 1.9421483278274536, "learning_rate": 7.473184436259737e-07, "loss": 0.6433679461479187, "step": 1800 }, { "epoch": 1.9439050701186624, "grad_norm": 1.5543241500854492, "learning_rate": 7.453495087962171e-07, "loss": 0.24959444999694824, "step": 1802 }, { "epoch": 1.9460625674217908, "grad_norm": 1.5686687231063843, "learning_rate": 7.433825813173513e-07, "loss": 0.5422605872154236, "step": 1804 }, { "epoch": 1.948220064724919, "grad_norm": 1.1901352405548096, "learning_rate": 7.414176723223484e-07, "loss": 0.28422844409942627, "step": 1806 }, { "epoch": 1.9503775620280475, "grad_norm": 1.0920721292495728, "learning_rate": 7.394547929327533e-07, "loss": 0.3562416732311249, "step": 1808 }, { "epoch": 1.9525350593311759, "grad_norm": 1.3346633911132812, "learning_rate": 7.374939542586249e-07, "loss": 0.43261829018592834, "step": 1810 }, { "epoch": 1.954692556634304, "grad_norm": 0.9234395623207092, "learning_rate": 7.355351673984718e-07, "loss": 0.1937822848558426, "step": 1812 }, { "epoch": 1.9568500539374325, "grad_norm": 1.3584299087524414, "learning_rate": 7.335784434391874e-07, "loss": 0.48144611716270447, "step": 1814 }, { "epoch": 1.959007551240561, "grad_norm": 3.7962646484375, "learning_rate": 7.316237934559906e-07, "loss": 0.5200175642967224, "step": 1816 }, { "epoch": 1.9611650485436893, "grad_norm": 2.161349058151245, "learning_rate": 7.296712285123603e-07, "loss": 0.535617470741272, "step": 1818 }, { "epoch": 1.9633225458468178, "grad_norm": 1.4636130332946777, "learning_rate": 7.277207596599746e-07, "loss": 0.5756503343582153, "step": 1820 }, { "epoch": 1.9654800431499462, "grad_norm": 1.4813960790634155, "learning_rate": 7.25772397938647e-07, "loss": 0.47899457812309265, "step": 1822 }, { "epoch": 1.9676375404530746, "grad_norm": 1.616752028465271, "learning_rate": 7.238261543762651e-07, "loss": 0.446144700050354, "step": 1824 }, { "epoch": 1.9697950377562028, "grad_norm": 3.5466485023498535, "learning_rate": 7.218820399887274e-07, "loss": 0.6149036884307861, "step": 1826 }, { "epoch": 1.9719525350593312, "grad_norm": 1.1728744506835938, "learning_rate": 7.199400657798802e-07, "loss": 0.3574240207672119, "step": 1828 }, { "epoch": 1.9741100323624594, "grad_norm": 1.4667657613754272, "learning_rate": 7.180002427414584e-07, "loss": 0.4160582721233368, "step": 1830 }, { "epoch": 1.9762675296655878, "grad_norm": 1.9609451293945312, "learning_rate": 7.160625818530175e-07, "loss": 0.4743785560131073, "step": 1832 }, { "epoch": 1.9784250269687162, "grad_norm": 1.3401987552642822, "learning_rate": 7.141270940818789e-07, "loss": 0.4877952039241791, "step": 1834 }, { "epoch": 1.9805825242718447, "grad_norm": 2.090475559234619, "learning_rate": 7.121937903830615e-07, "loss": 0.4774564206600189, "step": 1836 }, { "epoch": 1.982740021574973, "grad_norm": 1.7114410400390625, "learning_rate": 7.102626816992228e-07, "loss": 0.5767732262611389, "step": 1838 }, { "epoch": 1.9848975188781015, "grad_norm": 1.462022304534912, "learning_rate": 7.08333778960597e-07, "loss": 0.4107472598552704, "step": 1840 }, { "epoch": 1.98705501618123, "grad_norm": 20.44681739807129, "learning_rate": 7.064070930849315e-07, "loss": 0.44799551367759705, "step": 1842 }, { "epoch": 1.9892125134843581, "grad_norm": 1.4650821685791016, "learning_rate": 7.044826349774271e-07, "loss": 0.45217186212539673, "step": 1844 }, { "epoch": 1.9913700107874865, "grad_norm": 1.2852694988250732, "learning_rate": 7.025604155306735e-07, "loss": 0.5372745394706726, "step": 1846 }, { "epoch": 1.9935275080906147, "grad_norm": 3.7176291942596436, "learning_rate": 7.006404456245918e-07, "loss": 0.3280995488166809, "step": 1848 }, { "epoch": 1.9956850053937432, "grad_norm": 2.549321174621582, "learning_rate": 6.987227361263687e-07, "loss": 0.419173002243042, "step": 1850 }, { "epoch": 1.9978425026968716, "grad_norm": 0.8345087170600891, "learning_rate": 6.968072978903971e-07, "loss": 0.2861520051956177, "step": 1852 }, { "epoch": 2.0, "grad_norm": 1.6546863317489624, "learning_rate": 6.94894141758215e-07, "loss": 0.38616907596588135, "step": 1854 }, { "epoch": 2.0021574973031284, "grad_norm": 2.098503351211548, "learning_rate": 6.929832785584435e-07, "loss": 0.41417112946510315, "step": 1856 }, { "epoch": 2.004314994606257, "grad_norm": 1.7302627563476562, "learning_rate": 6.910747191067247e-07, "loss": 0.2879858613014221, "step": 1858 }, { "epoch": 2.0064724919093853, "grad_norm": 1.0965220928192139, "learning_rate": 6.891684742056614e-07, "loss": 0.3841347396373749, "step": 1860 }, { "epoch": 2.0086299892125137, "grad_norm": 1.9664329290390015, "learning_rate": 6.872645546447569e-07, "loss": 0.13829857110977173, "step": 1862 }, { "epoch": 2.0107874865156417, "grad_norm": 1.8741497993469238, "learning_rate": 6.85362971200352e-07, "loss": 0.3561688959598541, "step": 1864 }, { "epoch": 2.01294498381877, "grad_norm": 1.2023261785507202, "learning_rate": 6.834637346355648e-07, "loss": 0.24942456185817719, "step": 1866 }, { "epoch": 2.0151024811218985, "grad_norm": 3.097130298614502, "learning_rate": 6.815668557002304e-07, "loss": 0.16739408671855927, "step": 1868 }, { "epoch": 2.017259978425027, "grad_norm": 2.240835189819336, "learning_rate": 6.796723451308395e-07, "loss": 0.287383109331131, "step": 1870 }, { "epoch": 2.0194174757281553, "grad_norm": 1.164444923400879, "learning_rate": 6.777802136504772e-07, "loss": 0.27731871604919434, "step": 1872 }, { "epoch": 2.0215749730312838, "grad_norm": 3.692326307296753, "learning_rate": 6.758904719687624e-07, "loss": 0.42448198795318604, "step": 1874 }, { "epoch": 2.023732470334412, "grad_norm": 12.38591480255127, "learning_rate": 6.740031307817894e-07, "loss": 0.46731823682785034, "step": 1876 }, { "epoch": 2.0258899676375406, "grad_norm": 1.388956904411316, "learning_rate": 6.72118200772063e-07, "loss": 0.45638781785964966, "step": 1878 }, { "epoch": 2.028047464940669, "grad_norm": 2.0737364292144775, "learning_rate": 6.702356926084422e-07, "loss": 0.26328131556510925, "step": 1880 }, { "epoch": 2.030204962243797, "grad_norm": 4.843571662902832, "learning_rate": 6.683556169460786e-07, "loss": 0.3340507447719574, "step": 1882 }, { "epoch": 2.0323624595469254, "grad_norm": 0.6562245488166809, "learning_rate": 6.664779844263533e-07, "loss": 0.36223921179771423, "step": 1884 }, { "epoch": 2.034519956850054, "grad_norm": 1.3073861598968506, "learning_rate": 6.646028056768215e-07, "loss": 0.3697828948497772, "step": 1886 }, { "epoch": 2.0366774541531822, "grad_norm": 0.8905764818191528, "learning_rate": 6.627300913111484e-07, "loss": 0.23265878856182098, "step": 1888 }, { "epoch": 2.0388349514563107, "grad_norm": 1.8539386987686157, "learning_rate": 6.608598519290517e-07, "loss": 0.2889014780521393, "step": 1890 }, { "epoch": 2.040992448759439, "grad_norm": 1.5576704740524292, "learning_rate": 6.589920981162384e-07, "loss": 0.2241078019142151, "step": 1892 }, { "epoch": 2.0431499460625675, "grad_norm": 1.355921983718872, "learning_rate": 6.5712684044435e-07, "loss": 0.3171182870864868, "step": 1894 }, { "epoch": 2.045307443365696, "grad_norm": 1.5620806217193604, "learning_rate": 6.552640894708971e-07, "loss": 0.2683061361312866, "step": 1896 }, { "epoch": 2.0474649406688243, "grad_norm": 1.372431755065918, "learning_rate": 6.534038557392031e-07, "loss": 0.3898204267024994, "step": 1898 }, { "epoch": 2.0496224379719523, "grad_norm": 3.4002630710601807, "learning_rate": 6.515461497783441e-07, "loss": 0.18718461692333221, "step": 1900 }, { "epoch": 2.0517799352750807, "grad_norm": 0.8313205242156982, "learning_rate": 6.49690982103088e-07, "loss": 0.26798462867736816, "step": 1902 }, { "epoch": 2.053937432578209, "grad_norm": 0.6434590816497803, "learning_rate": 6.478383632138364e-07, "loss": 0.20526859164237976, "step": 1904 }, { "epoch": 2.0560949298813376, "grad_norm": 1.930409550666809, "learning_rate": 6.459883035965637e-07, "loss": 0.13682284951210022, "step": 1906 }, { "epoch": 2.058252427184466, "grad_norm": 1.254040241241455, "learning_rate": 6.441408137227597e-07, "loss": 0.21237482130527496, "step": 1908 }, { "epoch": 2.0604099244875944, "grad_norm": 2.053589105606079, "learning_rate": 6.422959040493687e-07, "loss": 0.30055493116378784, "step": 1910 }, { "epoch": 2.062567421790723, "grad_norm": 2.110161304473877, "learning_rate": 6.404535850187305e-07, "loss": 0.30984535813331604, "step": 1912 }, { "epoch": 2.0647249190938513, "grad_norm": 1.3705302476882935, "learning_rate": 6.386138670585226e-07, "loss": 0.3130619525909424, "step": 1914 }, { "epoch": 2.0668824163969797, "grad_norm": 2.5152392387390137, "learning_rate": 6.367767605816994e-07, "loss": 0.30358609557151794, "step": 1916 }, { "epoch": 2.0690399137001076, "grad_norm": 2.5105814933776855, "learning_rate": 6.349422759864343e-07, "loss": 0.3234387934207916, "step": 1918 }, { "epoch": 2.071197411003236, "grad_norm": 10.742331504821777, "learning_rate": 6.331104236560605e-07, "loss": 0.2621289789676666, "step": 1920 }, { "epoch": 2.0733549083063645, "grad_norm": 5.28401517868042, "learning_rate": 6.312812139590132e-07, "loss": 0.3532802164554596, "step": 1922 }, { "epoch": 2.075512405609493, "grad_norm": 1.3391534090042114, "learning_rate": 6.294546572487688e-07, "loss": 0.32384493947029114, "step": 1924 }, { "epoch": 2.0776699029126213, "grad_norm": 2.9051060676574707, "learning_rate": 6.276307638637881e-07, "loss": 0.38077038526535034, "step": 1926 }, { "epoch": 2.0798274002157497, "grad_norm": 1.0339443683624268, "learning_rate": 6.258095441274582e-07, "loss": 0.348030686378479, "step": 1928 }, { "epoch": 2.081984897518878, "grad_norm": 1.0094170570373535, "learning_rate": 6.239910083480317e-07, "loss": 0.22280654311180115, "step": 1930 }, { "epoch": 2.0841423948220066, "grad_norm": 6.025458812713623, "learning_rate": 6.221751668185706e-07, "loss": 0.2871300280094147, "step": 1932 }, { "epoch": 2.086299892125135, "grad_norm": 1.6982316970825195, "learning_rate": 6.203620298168865e-07, "loss": 0.36530792713165283, "step": 1934 }, { "epoch": 2.0884573894282634, "grad_norm": 1.1299806833267212, "learning_rate": 6.185516076054848e-07, "loss": 0.291080117225647, "step": 1936 }, { "epoch": 2.0906148867313914, "grad_norm": 2.090158700942993, "learning_rate": 6.167439104315022e-07, "loss": 0.28274258971214294, "step": 1938 }, { "epoch": 2.09277238403452, "grad_norm": 1.412570595741272, "learning_rate": 6.14938948526654e-07, "loss": 0.14839334785938263, "step": 1940 }, { "epoch": 2.0949298813376482, "grad_norm": 1.48866868019104, "learning_rate": 6.131367321071736e-07, "loss": 0.20612022280693054, "step": 1942 }, { "epoch": 2.0970873786407767, "grad_norm": 1.3251118659973145, "learning_rate": 6.113372713737521e-07, "loss": 0.2410675585269928, "step": 1944 }, { "epoch": 2.099244875943905, "grad_norm": 1.649924635887146, "learning_rate": 6.095405765114863e-07, "loss": 0.4107120931148529, "step": 1946 }, { "epoch": 2.1014023732470335, "grad_norm": 2.8146562576293945, "learning_rate": 6.077466576898161e-07, "loss": 0.22407367825508118, "step": 1948 }, { "epoch": 2.103559870550162, "grad_norm": 1.3203238248825073, "learning_rate": 6.05955525062469e-07, "loss": 0.3075147867202759, "step": 1950 }, { "epoch": 2.1057173678532903, "grad_norm": 2.156553268432617, "learning_rate": 6.04167188767403e-07, "loss": 0.32935836911201477, "step": 1952 }, { "epoch": 2.1078748651564188, "grad_norm": 3.6382105350494385, "learning_rate": 6.023816589267486e-07, "loss": 0.3246581554412842, "step": 1954 }, { "epoch": 2.1100323624595467, "grad_norm": 4.373478412628174, "learning_rate": 6.005989456467511e-07, "loss": 0.2509233355522156, "step": 1956 }, { "epoch": 2.112189859762675, "grad_norm": 1.778868556022644, "learning_rate": 5.988190590177132e-07, "loss": 0.3160122036933899, "step": 1958 }, { "epoch": 2.1143473570658036, "grad_norm": 0.4364719092845917, "learning_rate": 5.970420091139407e-07, "loss": 0.04425504431128502, "step": 1960 }, { "epoch": 2.116504854368932, "grad_norm": 1.821292519569397, "learning_rate": 5.952678059936811e-07, "loss": 0.18517985939979553, "step": 1962 }, { "epoch": 2.1186623516720604, "grad_norm": 3.6087453365325928, "learning_rate": 5.934964596990697e-07, "loss": 0.2705124616622925, "step": 1964 }, { "epoch": 2.120819848975189, "grad_norm": 1.464837670326233, "learning_rate": 5.917279802560719e-07, "loss": 0.21107757091522217, "step": 1966 }, { "epoch": 2.1229773462783172, "grad_norm": 4.029551029205322, "learning_rate": 5.899623776744268e-07, "loss": 0.23950833082199097, "step": 1968 }, { "epoch": 2.1251348435814457, "grad_norm": 1.4416351318359375, "learning_rate": 5.881996619475898e-07, "loss": 0.3448520600795746, "step": 1970 }, { "epoch": 2.127292340884574, "grad_norm": 2.3087425231933594, "learning_rate": 5.864398430526765e-07, "loss": 0.18799349665641785, "step": 1972 }, { "epoch": 2.129449838187702, "grad_norm": 0.4913981556892395, "learning_rate": 5.846829309504064e-07, "loss": 0.23318088054656982, "step": 1974 }, { "epoch": 2.1316073354908305, "grad_norm": 0.16859030723571777, "learning_rate": 5.829289355850464e-07, "loss": 0.22492466866970062, "step": 1976 }, { "epoch": 2.133764832793959, "grad_norm": 1.511398196220398, "learning_rate": 5.811778668843541e-07, "loss": 0.20076408982276917, "step": 1978 }, { "epoch": 2.1359223300970873, "grad_norm": 3.2070164680480957, "learning_rate": 5.794297347595216e-07, "loss": 0.2566869258880615, "step": 1980 }, { "epoch": 2.1380798274002157, "grad_norm": 1.4940425157546997, "learning_rate": 5.77684549105121e-07, "loss": 0.34161150455474854, "step": 1982 }, { "epoch": 2.140237324703344, "grad_norm": 1.6653574705123901, "learning_rate": 5.75942319799046e-07, "loss": 0.2889230251312256, "step": 1984 }, { "epoch": 2.1423948220064726, "grad_norm": 1.7484220266342163, "learning_rate": 5.742030567024571e-07, "loss": 0.2357415109872818, "step": 1986 }, { "epoch": 2.144552319309601, "grad_norm": 2.1993203163146973, "learning_rate": 5.724667696597274e-07, "loss": 0.43604907393455505, "step": 1988 }, { "epoch": 2.1467098166127294, "grad_norm": 3.952744960784912, "learning_rate": 5.707334684983824e-07, "loss": 0.1417762041091919, "step": 1990 }, { "epoch": 2.148867313915858, "grad_norm": 1.5836926698684692, "learning_rate": 5.690031630290504e-07, "loss": 0.3969094753265381, "step": 1992 }, { "epoch": 2.151024811218986, "grad_norm": 2.5084621906280518, "learning_rate": 5.672758630454016e-07, "loss": 0.3280077576637268, "step": 1994 }, { "epoch": 2.1531823085221142, "grad_norm": 2.075791835784912, "learning_rate": 5.655515783240958e-07, "loss": 0.23175282776355743, "step": 1996 }, { "epoch": 2.1553398058252426, "grad_norm": 1.316325306892395, "learning_rate": 5.63830318624726e-07, "loss": 0.3914681077003479, "step": 1998 }, { "epoch": 2.157497303128371, "grad_norm": 1.3925992250442505, "learning_rate": 5.621120936897634e-07, "loss": 0.2599402368068695, "step": 2000 }, { "epoch": 2.1596548004314995, "grad_norm": 0.6926285624504089, "learning_rate": 5.60396913244503e-07, "loss": 0.20738252997398376, "step": 2002 }, { "epoch": 2.161812297734628, "grad_norm": 2.7593421936035156, "learning_rate": 5.586847869970058e-07, "loss": 0.3029998242855072, "step": 2004 }, { "epoch": 2.1639697950377563, "grad_norm": 1.116198182106018, "learning_rate": 5.569757246380473e-07, "loss": 0.3626508414745331, "step": 2006 }, { "epoch": 2.1661272923408847, "grad_norm": 1.4919781684875488, "learning_rate": 5.552697358410607e-07, "loss": 0.1747465282678604, "step": 2008 }, { "epoch": 2.168284789644013, "grad_norm": 2.0446255207061768, "learning_rate": 5.535668302620828e-07, "loss": 0.3265528082847595, "step": 2010 }, { "epoch": 2.170442286947141, "grad_norm": 1.902366280555725, "learning_rate": 5.518670175396986e-07, "loss": 0.20548182725906372, "step": 2012 }, { "epoch": 2.1725997842502696, "grad_norm": 1.5543972253799438, "learning_rate": 5.50170307294988e-07, "loss": 0.30606332421302795, "step": 2014 }, { "epoch": 2.174757281553398, "grad_norm": 4.172656059265137, "learning_rate": 5.484767091314703e-07, "loss": 0.43032437562942505, "step": 2016 }, { "epoch": 2.1769147788565264, "grad_norm": 1.8856433629989624, "learning_rate": 5.467862326350495e-07, "loss": 0.3198752701282501, "step": 2018 }, { "epoch": 2.179072276159655, "grad_norm": 2.642690420150757, "learning_rate": 5.450988873739622e-07, "loss": 0.39496558904647827, "step": 2020 }, { "epoch": 2.1812297734627832, "grad_norm": 1.6509486436843872, "learning_rate": 5.434146828987205e-07, "loss": 0.3583213686943054, "step": 2022 }, { "epoch": 2.1833872707659117, "grad_norm": 5.141426086425781, "learning_rate": 5.417336287420602e-07, "loss": 0.3227855861186981, "step": 2024 }, { "epoch": 2.18554476806904, "grad_norm": 2.045142412185669, "learning_rate": 5.400557344188854e-07, "loss": 0.3605496883392334, "step": 2026 }, { "epoch": 2.1877022653721685, "grad_norm": 1.7817591428756714, "learning_rate": 5.383810094262164e-07, "loss": 0.2572648525238037, "step": 2028 }, { "epoch": 2.1898597626752965, "grad_norm": 1.3152915239334106, "learning_rate": 5.367094632431337e-07, "loss": 0.23091773688793182, "step": 2030 }, { "epoch": 2.192017259978425, "grad_norm": 2.0624170303344727, "learning_rate": 5.350411053307258e-07, "loss": 0.32704049348831177, "step": 2032 }, { "epoch": 2.1941747572815533, "grad_norm": 0.41580072045326233, "learning_rate": 5.33375945132036e-07, "loss": 0.1712155044078827, "step": 2034 }, { "epoch": 2.1963322545846817, "grad_norm": 6.342857360839844, "learning_rate": 5.317139920720069e-07, "loss": 0.11315549165010452, "step": 2036 }, { "epoch": 2.19848975188781, "grad_norm": 2.2447385787963867, "learning_rate": 5.300552555574296e-07, "loss": 0.2954585552215576, "step": 2038 }, { "epoch": 2.2006472491909386, "grad_norm": 1.2444943189620972, "learning_rate": 5.28399744976889e-07, "loss": 0.3177575170993805, "step": 2040 }, { "epoch": 2.202804746494067, "grad_norm": 1.3165860176086426, "learning_rate": 5.267474697007111e-07, "loss": 0.18876095116138458, "step": 2042 }, { "epoch": 2.2049622437971954, "grad_norm": 1.2250696420669556, "learning_rate": 5.250984390809092e-07, "loss": 0.36252525448799133, "step": 2044 }, { "epoch": 2.207119741100324, "grad_norm": 2.139589309692383, "learning_rate": 5.234526624511319e-07, "loss": 0.30584216117858887, "step": 2046 }, { "epoch": 2.209277238403452, "grad_norm": 1.1470484733581543, "learning_rate": 5.218101491266108e-07, "loss": 0.27216002345085144, "step": 2048 }, { "epoch": 2.2114347357065802, "grad_norm": 1.030771017074585, "learning_rate": 5.201709084041051e-07, "loss": 0.26957935094833374, "step": 2050 }, { "epoch": 2.2135922330097086, "grad_norm": 3.0721845626831055, "learning_rate": 5.185349495618523e-07, "loss": 0.35413840413093567, "step": 2052 }, { "epoch": 2.215749730312837, "grad_norm": 1.2705237865447998, "learning_rate": 5.169022818595139e-07, "loss": 0.2741287648677826, "step": 2054 }, { "epoch": 2.2179072276159655, "grad_norm": 1.2934070825576782, "learning_rate": 5.152729145381226e-07, "loss": 0.4308694899082184, "step": 2056 }, { "epoch": 2.220064724919094, "grad_norm": 1.0611752271652222, "learning_rate": 5.136468568200319e-07, "loss": 0.3000924587249756, "step": 2058 }, { "epoch": 2.2222222222222223, "grad_norm": 1.2346817255020142, "learning_rate": 5.120241179088615e-07, "loss": 0.24799837172031403, "step": 2060 }, { "epoch": 2.2243797195253507, "grad_norm": 2.2104601860046387, "learning_rate": 5.10404706989447e-07, "loss": 0.26346859335899353, "step": 2062 }, { "epoch": 2.226537216828479, "grad_norm": 1.240453839302063, "learning_rate": 5.087886332277866e-07, "loss": 0.22416910529136658, "step": 2064 }, { "epoch": 2.228694714131607, "grad_norm": 1.543516755104065, "learning_rate": 5.071759057709915e-07, "loss": 0.2962421178817749, "step": 2066 }, { "epoch": 2.2308522114347356, "grad_norm": 1.8429937362670898, "learning_rate": 5.055665337472306e-07, "loss": 0.21297654509544373, "step": 2068 }, { "epoch": 2.233009708737864, "grad_norm": 1.4537338018417358, "learning_rate": 5.039605262656816e-07, "loss": 0.15425504744052887, "step": 2070 }, { "epoch": 2.2351672060409924, "grad_norm": 2.0566556453704834, "learning_rate": 5.023578924164795e-07, "loss": 0.4284798502922058, "step": 2072 }, { "epoch": 2.237324703344121, "grad_norm": 1.6528476476669312, "learning_rate": 5.007586412706629e-07, "loss": 0.3135349750518799, "step": 2074 }, { "epoch": 2.2394822006472492, "grad_norm": 2.057396173477173, "learning_rate": 4.991627818801245e-07, "loss": 0.3012422025203705, "step": 2076 }, { "epoch": 2.2416396979503777, "grad_norm": 2.47934889793396, "learning_rate": 4.975703232775593e-07, "loss": 0.24459701776504517, "step": 2078 }, { "epoch": 2.243797195253506, "grad_norm": 1.943104863166809, "learning_rate": 4.959812744764143e-07, "loss": 0.3131766617298126, "step": 2080 }, { "epoch": 2.2459546925566345, "grad_norm": 1.1001262664794922, "learning_rate": 4.943956444708357e-07, "loss": 0.21205957233905792, "step": 2082 }, { "epoch": 2.2481121898597625, "grad_norm": 4.736835956573486, "learning_rate": 4.928134422356194e-07, "loss": 0.2981138825416565, "step": 2084 }, { "epoch": 2.250269687162891, "grad_norm": 2.2798216342926025, "learning_rate": 4.912346767261605e-07, "loss": 0.1790456920862198, "step": 2086 }, { "epoch": 2.2524271844660193, "grad_norm": 1.6988016366958618, "learning_rate": 4.896593568784008e-07, "loss": 0.35665163397789, "step": 2088 }, { "epoch": 2.2545846817691477, "grad_norm": 8.441971778869629, "learning_rate": 4.880874916087802e-07, "loss": 0.42117640376091003, "step": 2090 }, { "epoch": 2.256742179072276, "grad_norm": 1.6971404552459717, "learning_rate": 4.865190898141847e-07, "loss": 0.3492169678211212, "step": 2092 }, { "epoch": 2.2588996763754046, "grad_norm": 0.9748818874359131, "learning_rate": 4.849541603718984e-07, "loss": 0.24743738770484924, "step": 2094 }, { "epoch": 2.261057173678533, "grad_norm": 1.4215339422225952, "learning_rate": 4.833927121395488e-07, "loss": 0.3036370277404785, "step": 2096 }, { "epoch": 2.2632146709816614, "grad_norm": 1.2809211015701294, "learning_rate": 4.818347539550621e-07, "loss": 0.2065061330795288, "step": 2098 }, { "epoch": 2.26537216828479, "grad_norm": 1.3214771747589111, "learning_rate": 4.802802946366094e-07, "loss": 0.15301240980625153, "step": 2100 }, { "epoch": 2.267529665587918, "grad_norm": 1.464438557624817, "learning_rate": 4.787293429825575e-07, "loss": 0.24719694256782532, "step": 2102 }, { "epoch": 2.269687162891046, "grad_norm": 1.064639925956726, "learning_rate": 4.771819077714207e-07, "loss": 0.25772497057914734, "step": 2104 }, { "epoch": 2.2718446601941746, "grad_norm": 1.1112815141677856, "learning_rate": 4.756379977618093e-07, "loss": 0.3478604257106781, "step": 2106 }, { "epoch": 2.274002157497303, "grad_norm": 2.554323196411133, "learning_rate": 4.740976216923803e-07, "loss": 0.2710329592227936, "step": 2108 }, { "epoch": 2.2761596548004315, "grad_norm": 1.0012887716293335, "learning_rate": 4.725607882817886e-07, "loss": 0.17554689943790436, "step": 2110 }, { "epoch": 2.27831715210356, "grad_norm": 7.408605098724365, "learning_rate": 4.710275062286379e-07, "loss": 0.32163527607917786, "step": 2112 }, { "epoch": 2.2804746494066883, "grad_norm": 1.3676038980484009, "learning_rate": 4.694977842114303e-07, "loss": 0.2676321864128113, "step": 2114 }, { "epoch": 2.2826321467098167, "grad_norm": 3.824159860610962, "learning_rate": 4.6797163088851777e-07, "loss": 0.3604358434677124, "step": 2116 }, { "epoch": 2.284789644012945, "grad_norm": 2.4927074909210205, "learning_rate": 4.6644905489805377e-07, "loss": 0.19784438610076904, "step": 2118 }, { "epoch": 2.286947141316073, "grad_norm": 2.12221097946167, "learning_rate": 4.6493006485794325e-07, "loss": 0.14516694843769073, "step": 2120 }, { "epoch": 2.2891046386192015, "grad_norm": 2.6523265838623047, "learning_rate": 4.6341466936579445e-07, "loss": 0.4613579511642456, "step": 2122 }, { "epoch": 2.29126213592233, "grad_norm": 1.4518303871154785, "learning_rate": 4.6190287699887e-07, "loss": 0.29724588990211487, "step": 2124 }, { "epoch": 2.2934196332254584, "grad_norm": 1.7566689252853394, "learning_rate": 4.6039469631403926e-07, "loss": 0.29199060797691345, "step": 2126 }, { "epoch": 2.295577130528587, "grad_norm": 1.4850506782531738, "learning_rate": 4.588901358477287e-07, "loss": 0.34357935190200806, "step": 2128 }, { "epoch": 2.2977346278317152, "grad_norm": 2.186891794204712, "learning_rate": 4.5738920411587333e-07, "loss": 0.35363560914993286, "step": 2130 }, { "epoch": 2.2998921251348436, "grad_norm": 1.4798485040664673, "learning_rate": 4.5589190961387085e-07, "loss": 0.345289021730423, "step": 2132 }, { "epoch": 2.302049622437972, "grad_norm": 1.1641236543655396, "learning_rate": 4.543982608165307e-07, "loss": 0.3444761633872986, "step": 2134 }, { "epoch": 2.3042071197411005, "grad_norm": 5.22445821762085, "learning_rate": 4.529082661780277e-07, "loss": 0.4176110625267029, "step": 2136 }, { "epoch": 2.3063646170442285, "grad_norm": 2.9714982509613037, "learning_rate": 4.514219341318534e-07, "loss": 0.3582867383956909, "step": 2138 }, { "epoch": 2.308522114347357, "grad_norm": 1.2270290851593018, "learning_rate": 4.499392730907701e-07, "loss": 0.19157586991786957, "step": 2140 }, { "epoch": 2.3106796116504853, "grad_norm": 2.424367904663086, "learning_rate": 4.484602914467599e-07, "loss": 0.17550167441368103, "step": 2142 }, { "epoch": 2.3128371089536137, "grad_norm": 6.250462532043457, "learning_rate": 4.4698499757098085e-07, "loss": 0.2245817482471466, "step": 2144 }, { "epoch": 2.314994606256742, "grad_norm": 1.6150588989257812, "learning_rate": 4.4551339981371805e-07, "loss": 0.22309915721416473, "step": 2146 }, { "epoch": 2.3171521035598706, "grad_norm": 2.557511568069458, "learning_rate": 4.4404550650433423e-07, "loss": 0.31364479660987854, "step": 2148 }, { "epoch": 2.319309600862999, "grad_norm": 1.9506558179855347, "learning_rate": 4.4258132595122697e-07, "loss": 0.3908032774925232, "step": 2150 }, { "epoch": 2.3214670981661274, "grad_norm": 1.9863252639770508, "learning_rate": 4.411208664417779e-07, "loss": 0.33292022347450256, "step": 2152 }, { "epoch": 2.323624595469256, "grad_norm": 2.143460512161255, "learning_rate": 4.3966413624230847e-07, "loss": 0.1916477084159851, "step": 2154 }, { "epoch": 2.325782092772384, "grad_norm": 3.893472194671631, "learning_rate": 4.3821114359803016e-07, "loss": 0.33617085218429565, "step": 2156 }, { "epoch": 2.3279395900755127, "grad_norm": 2.143810272216797, "learning_rate": 4.367618967330011e-07, "loss": 0.3440120220184326, "step": 2158 }, { "epoch": 2.3300970873786406, "grad_norm": 1.6093735694885254, "learning_rate": 4.35316403850078e-07, "loss": 0.18562518060207367, "step": 2160 }, { "epoch": 2.332254584681769, "grad_norm": 1.975888729095459, "learning_rate": 4.3387467313086825e-07, "loss": 0.15831519663333893, "step": 2162 }, { "epoch": 2.3344120819848975, "grad_norm": 1.708641529083252, "learning_rate": 4.324367127356868e-07, "loss": 0.2527565360069275, "step": 2164 }, { "epoch": 2.336569579288026, "grad_norm": 1.4729366302490234, "learning_rate": 4.310025308035073e-07, "loss": 0.2772301435470581, "step": 2166 }, { "epoch": 2.3387270765911543, "grad_norm": 1.6301394701004028, "learning_rate": 4.295721354519172e-07, "loss": 0.3133164644241333, "step": 2168 }, { "epoch": 2.3408845738942827, "grad_norm": 3.0249712467193604, "learning_rate": 4.281455347770713e-07, "loss": 0.24287529289722443, "step": 2170 }, { "epoch": 2.343042071197411, "grad_norm": 1.6311941146850586, "learning_rate": 4.2672273685364703e-07, "loss": 0.4204927086830139, "step": 2172 }, { "epoch": 2.3451995685005396, "grad_norm": 5.362087249755859, "learning_rate": 4.253037497347971e-07, "loss": 0.37392908334732056, "step": 2174 }, { "epoch": 2.347357065803668, "grad_norm": 1.4209610223770142, "learning_rate": 4.2388858145210506e-07, "loss": 0.21947862207889557, "step": 2176 }, { "epoch": 2.349514563106796, "grad_norm": 1.5687469244003296, "learning_rate": 4.224772400155399e-07, "loss": 0.36956965923309326, "step": 2178 }, { "epoch": 2.3516720604099244, "grad_norm": 1.6838412284851074, "learning_rate": 4.2106973341340976e-07, "loss": 0.23953932523727417, "step": 2180 }, { "epoch": 2.353829557713053, "grad_norm": 4.918888568878174, "learning_rate": 4.1966606961231766e-07, "loss": 0.3331076502799988, "step": 2182 }, { "epoch": 2.355987055016181, "grad_norm": 1.4841556549072266, "learning_rate": 4.182662565571154e-07, "loss": 0.2150951325893402, "step": 2184 }, { "epoch": 2.3581445523193096, "grad_norm": 2.378197193145752, "learning_rate": 4.168703021708605e-07, "loss": 0.442268043756485, "step": 2186 }, { "epoch": 2.360302049622438, "grad_norm": 1.1213322877883911, "learning_rate": 4.154782143547691e-07, "loss": 0.21381919085979462, "step": 2188 }, { "epoch": 2.3624595469255665, "grad_norm": 1.2793383598327637, "learning_rate": 4.140900009881722e-07, "loss": 0.297492116689682, "step": 2190 }, { "epoch": 2.364617044228695, "grad_norm": 1.0152313709259033, "learning_rate": 4.127056699284719e-07, "loss": 0.2662775218486786, "step": 2192 }, { "epoch": 2.3667745415318233, "grad_norm": 3.4524388313293457, "learning_rate": 4.1132522901109547e-07, "loss": 0.1951354295015335, "step": 2194 }, { "epoch": 2.3689320388349513, "grad_norm": 5.245743751525879, "learning_rate": 4.099486860494517e-07, "loss": 0.3872916102409363, "step": 2196 }, { "epoch": 2.3710895361380797, "grad_norm": 1.206447720527649, "learning_rate": 4.085760488348866e-07, "loss": 0.24260494112968445, "step": 2198 }, { "epoch": 2.373247033441208, "grad_norm": 1.1352065801620483, "learning_rate": 4.0720732513663985e-07, "loss": 0.24157175421714783, "step": 2200 }, { "epoch": 2.3754045307443366, "grad_norm": 1.7284468412399292, "learning_rate": 4.0584252270179975e-07, "loss": 0.24914561212062836, "step": 2202 }, { "epoch": 2.377562028047465, "grad_norm": 3.4234535694122314, "learning_rate": 4.0448164925525987e-07, "loss": 0.4321536421775818, "step": 2204 }, { "epoch": 2.3797195253505934, "grad_norm": 0.3995194137096405, "learning_rate": 4.031247124996764e-07, "loss": 0.3386417329311371, "step": 2206 }, { "epoch": 2.381877022653722, "grad_norm": 1.4748889207839966, "learning_rate": 4.017717201154217e-07, "loss": 0.2493990957736969, "step": 2208 }, { "epoch": 2.3840345199568502, "grad_norm": 2.027784824371338, "learning_rate": 4.004226797605445e-07, "loss": 0.36100074648857117, "step": 2210 }, { "epoch": 2.3861920172599786, "grad_norm": 1.2897789478302002, "learning_rate": 3.990775990707237e-07, "loss": 0.23212602734565735, "step": 2212 }, { "epoch": 2.3883495145631066, "grad_norm": 1.4718191623687744, "learning_rate": 3.9773648565922634e-07, "loss": 0.17623895406723022, "step": 2214 }, { "epoch": 2.390507011866235, "grad_norm": 1.6537420749664307, "learning_rate": 3.963993471168643e-07, "loss": 0.3103001117706299, "step": 2216 }, { "epoch": 2.3926645091693635, "grad_norm": 3.3019044399261475, "learning_rate": 3.9506619101195196e-07, "loss": 0.3791100084781647, "step": 2218 }, { "epoch": 2.394822006472492, "grad_norm": 0.7170013785362244, "learning_rate": 3.9373702489026184e-07, "loss": 0.23801524937152863, "step": 2220 }, { "epoch": 2.3969795037756203, "grad_norm": 1.0052597522735596, "learning_rate": 3.9241185627498333e-07, "loss": 0.2777608036994934, "step": 2222 }, { "epoch": 2.3991370010787487, "grad_norm": 5.178793907165527, "learning_rate": 3.9109069266668e-07, "loss": 0.35337719321250916, "step": 2224 }, { "epoch": 2.401294498381877, "grad_norm": 1.5237584114074707, "learning_rate": 3.8977354154324586e-07, "loss": 0.3222746253013611, "step": 2226 }, { "epoch": 2.4034519956850056, "grad_norm": 2.202849864959717, "learning_rate": 3.884604103598647e-07, "loss": 0.2847940921783447, "step": 2228 }, { "epoch": 2.405609492988134, "grad_norm": 2.125887870788574, "learning_rate": 3.8715130654896623e-07, "loss": 0.3124774992465973, "step": 2230 }, { "epoch": 2.407766990291262, "grad_norm": 2.585139513015747, "learning_rate": 3.858462375201862e-07, "loss": 0.3422589898109436, "step": 2232 }, { "epoch": 2.4099244875943904, "grad_norm": 2.663825035095215, "learning_rate": 3.8454521066032214e-07, "loss": 0.2664566934108734, "step": 2234 }, { "epoch": 2.412081984897519, "grad_norm": 8.156516075134277, "learning_rate": 3.8324823333329263e-07, "loss": 0.2662767171859741, "step": 2236 }, { "epoch": 2.414239482200647, "grad_norm": 1.3628453016281128, "learning_rate": 3.819553128800962e-07, "loss": 0.3383438289165497, "step": 2238 }, { "epoch": 2.4163969795037756, "grad_norm": 1.212145447731018, "learning_rate": 3.806664566187686e-07, "loss": 0.3051017224788666, "step": 2240 }, { "epoch": 2.418554476806904, "grad_norm": 4.809718608856201, "learning_rate": 3.7938167184434206e-07, "loss": 0.2975846230983734, "step": 2242 }, { "epoch": 2.4207119741100325, "grad_norm": 1.992264986038208, "learning_rate": 3.781009658288036e-07, "loss": 0.2492162585258484, "step": 2244 }, { "epoch": 2.422869471413161, "grad_norm": 2.0577735900878906, "learning_rate": 3.768243458210549e-07, "loss": 0.35954225063323975, "step": 2246 }, { "epoch": 2.4250269687162893, "grad_norm": 1.2022833824157715, "learning_rate": 3.755518190468697e-07, "loss": 0.1975034475326538, "step": 2248 }, { "epoch": 2.4271844660194173, "grad_norm": 2.0728843212127686, "learning_rate": 3.7428339270885367e-07, "loss": 0.3646237254142761, "step": 2250 }, { "epoch": 2.4293419633225457, "grad_norm": 1.2893973588943481, "learning_rate": 3.73019073986405e-07, "loss": 0.22355937957763672, "step": 2252 }, { "epoch": 2.431499460625674, "grad_norm": 2.275219440460205, "learning_rate": 3.717588700356702e-07, "loss": 0.1818881779909134, "step": 2254 }, { "epoch": 2.4336569579288025, "grad_norm": 1.8378785848617554, "learning_rate": 3.7050278798950795e-07, "loss": 0.22650231420993805, "step": 2256 }, { "epoch": 2.435814455231931, "grad_norm": 2.7440717220306396, "learning_rate": 3.6925083495744534e-07, "loss": 0.44353553652763367, "step": 2258 }, { "epoch": 2.4379719525350594, "grad_norm": 1.588118076324463, "learning_rate": 3.6800301802563927e-07, "loss": 0.2522018551826477, "step": 2260 }, { "epoch": 2.440129449838188, "grad_norm": 1.7098896503448486, "learning_rate": 3.667593442568364e-07, "loss": 0.32094866037368774, "step": 2262 }, { "epoch": 2.4422869471413162, "grad_norm": 1.7896616458892822, "learning_rate": 3.6551982069033205e-07, "loss": 0.2640255093574524, "step": 2264 }, { "epoch": 2.4444444444444446, "grad_norm": 0.299568235874176, "learning_rate": 3.6428445434193136e-07, "loss": 0.14886681735515594, "step": 2266 }, { "epoch": 2.4466019417475726, "grad_norm": 1.7262334823608398, "learning_rate": 3.6305325220390905e-07, "loss": 0.26833122968673706, "step": 2268 }, { "epoch": 2.448759439050701, "grad_norm": 1.531208872795105, "learning_rate": 3.618262212449706e-07, "loss": 0.4644531011581421, "step": 2270 }, { "epoch": 2.4509169363538295, "grad_norm": 2.4541876316070557, "learning_rate": 3.606033684102121e-07, "loss": 0.21291311085224152, "step": 2272 }, { "epoch": 2.453074433656958, "grad_norm": 1.4597750902175903, "learning_rate": 3.5938470062108043e-07, "loss": 0.22411037981510162, "step": 2274 }, { "epoch": 2.4552319309600863, "grad_norm": 3.633920431137085, "learning_rate": 3.5817022477533585e-07, "loss": 0.23133979737758636, "step": 2276 }, { "epoch": 2.4573894282632147, "grad_norm": 1.5003464221954346, "learning_rate": 3.569599477470112e-07, "loss": 0.3434515595436096, "step": 2278 }, { "epoch": 2.459546925566343, "grad_norm": 2.270390033721924, "learning_rate": 3.5575387638637357e-07, "loss": 0.3822650611400604, "step": 2280 }, { "epoch": 2.4617044228694716, "grad_norm": 1.8612627983093262, "learning_rate": 3.545520175198858e-07, "loss": 0.2585938274860382, "step": 2282 }, { "epoch": 2.4638619201726, "grad_norm": 2.235671281814575, "learning_rate": 3.5335437795016823e-07, "loss": 0.28054654598236084, "step": 2284 }, { "epoch": 2.466019417475728, "grad_norm": 4.319032669067383, "learning_rate": 3.5216096445595884e-07, "loss": 0.4531608819961548, "step": 2286 }, { "epoch": 2.4681769147788564, "grad_norm": 1.4703388214111328, "learning_rate": 3.509717837920756e-07, "loss": 0.3540151119232178, "step": 2288 }, { "epoch": 2.470334412081985, "grad_norm": 2.039675712585449, "learning_rate": 3.497868426893793e-07, "loss": 0.13517698645591736, "step": 2290 }, { "epoch": 2.472491909385113, "grad_norm": 0.5410081148147583, "learning_rate": 3.486061478547337e-07, "loss": 0.02865418791770935, "step": 2292 }, { "epoch": 2.4746494066882416, "grad_norm": 1.2113001346588135, "learning_rate": 3.4742970597096834e-07, "loss": 0.33490967750549316, "step": 2294 }, { "epoch": 2.47680690399137, "grad_norm": 1.3965346813201904, "learning_rate": 3.462575236968406e-07, "loss": 0.18190385401248932, "step": 2296 }, { "epoch": 2.4789644012944985, "grad_norm": 3.908724069595337, "learning_rate": 3.4508960766699914e-07, "loss": 0.31986406445503235, "step": 2298 }, { "epoch": 2.481121898597627, "grad_norm": 1.823320746421814, "learning_rate": 3.4392596449194346e-07, "loss": 0.24571648240089417, "step": 2300 }, { "epoch": 2.4832793959007553, "grad_norm": 9.324934959411621, "learning_rate": 3.427666007579902e-07, "loss": 0.2971467077732086, "step": 2302 }, { "epoch": 2.4854368932038833, "grad_norm": 1.0990798473358154, "learning_rate": 3.416115230272333e-07, "loss": 0.3316362500190735, "step": 2304 }, { "epoch": 2.4875943905070117, "grad_norm": 0.9614824056625366, "learning_rate": 3.4046073783750726e-07, "loss": 0.21634887158870697, "step": 2306 }, { "epoch": 2.48975188781014, "grad_norm": 3.744295120239258, "learning_rate": 3.3931425170235083e-07, "loss": 0.3917967677116394, "step": 2308 }, { "epoch": 2.4919093851132685, "grad_norm": 2.0177054405212402, "learning_rate": 3.381720711109695e-07, "loss": 0.3639075458049774, "step": 2310 }, { "epoch": 2.494066882416397, "grad_norm": 3.873263359069824, "learning_rate": 3.3703420252819947e-07, "loss": 0.28831595182418823, "step": 2312 }, { "epoch": 2.4962243797195254, "grad_norm": 9.90953540802002, "learning_rate": 3.359006523944697e-07, "loss": 0.2942795753479004, "step": 2314 }, { "epoch": 2.498381877022654, "grad_norm": 1.6383076906204224, "learning_rate": 3.347714271257679e-07, "loss": 0.19500726461410522, "step": 2316 }, { "epoch": 2.500539374325782, "grad_norm": 1.960985779762268, "learning_rate": 3.3364653311360104e-07, "loss": 0.30468082427978516, "step": 2318 }, { "epoch": 2.5026968716289106, "grad_norm": 1.2923824787139893, "learning_rate": 3.325259767249617e-07, "loss": 0.20314782857894897, "step": 2320 }, { "epoch": 2.5048543689320386, "grad_norm": 1.4000043869018555, "learning_rate": 3.3140976430229136e-07, "loss": 0.34522709250450134, "step": 2322 }, { "epoch": 2.5070118662351675, "grad_norm": 1.8520127534866333, "learning_rate": 3.302979021634438e-07, "loss": 0.3963944911956787, "step": 2324 }, { "epoch": 2.5091693635382954, "grad_norm": 1.3927682638168335, "learning_rate": 3.2919039660164973e-07, "loss": 0.126472607254982, "step": 2326 }, { "epoch": 2.511326860841424, "grad_norm": 1.8297348022460938, "learning_rate": 3.2808725388548164e-07, "loss": 0.3507118821144104, "step": 2328 }, { "epoch": 2.5134843581445523, "grad_norm": 0.6373293399810791, "learning_rate": 3.269884802588181e-07, "loss": 0.15893447399139404, "step": 2330 }, { "epoch": 2.5156418554476807, "grad_norm": 0.42088356614112854, "learning_rate": 3.258940819408079e-07, "loss": 0.14911764860153198, "step": 2332 }, { "epoch": 2.517799352750809, "grad_norm": 2.131605863571167, "learning_rate": 3.248040651258352e-07, "loss": 0.2661122679710388, "step": 2334 }, { "epoch": 2.5199568500539375, "grad_norm": 7.058028697967529, "learning_rate": 3.2371843598348485e-07, "loss": 0.35104191303253174, "step": 2336 }, { "epoch": 2.522114347357066, "grad_norm": 12.030430793762207, "learning_rate": 3.2263720065850686e-07, "loss": 0.28974059224128723, "step": 2338 }, { "epoch": 2.524271844660194, "grad_norm": 2.7837140560150146, "learning_rate": 3.215603652707819e-07, "loss": 0.4368301033973694, "step": 2340 }, { "epoch": 2.526429341963323, "grad_norm": 7.616663455963135, "learning_rate": 3.2048793591528655e-07, "loss": 0.47825562953948975, "step": 2342 }, { "epoch": 2.528586839266451, "grad_norm": 3.9860897064208984, "learning_rate": 3.194199186620592e-07, "loss": 0.31387850642204285, "step": 2344 }, { "epoch": 2.530744336569579, "grad_norm": 1.596435546875, "learning_rate": 3.1835631955616505e-07, "loss": 0.3126782178878784, "step": 2346 }, { "epoch": 2.5329018338727076, "grad_norm": 1.315140724182129, "learning_rate": 3.172971446176621e-07, "loss": 0.26786503195762634, "step": 2348 }, { "epoch": 2.535059331175836, "grad_norm": 0.4910350441932678, "learning_rate": 3.16242399841568e-07, "loss": 0.1304475963115692, "step": 2350 }, { "epoch": 2.5372168284789645, "grad_norm": 1.8181979656219482, "learning_rate": 3.1519209119782435e-07, "loss": 0.26876750588417053, "step": 2352 }, { "epoch": 2.539374325782093, "grad_norm": 2.385711193084717, "learning_rate": 3.141462246312644e-07, "loss": 0.2904283106327057, "step": 2354 }, { "epoch": 2.5415318230852213, "grad_norm": 1.548781156539917, "learning_rate": 3.1310480606157864e-07, "loss": 0.2804209589958191, "step": 2356 }, { "epoch": 2.5436893203883493, "grad_norm": 2.7937028408050537, "learning_rate": 3.120678413832821e-07, "loss": 0.36265939474105835, "step": 2358 }, { "epoch": 2.545846817691478, "grad_norm": 1.6474692821502686, "learning_rate": 3.110353364656792e-07, "loss": 0.2979966700077057, "step": 2360 }, { "epoch": 2.548004314994606, "grad_norm": 1.5488511323928833, "learning_rate": 3.1000729715283306e-07, "loss": 0.37080666422843933, "step": 2362 }, { "epoch": 2.5501618122977345, "grad_norm": 2.0763769149780273, "learning_rate": 3.089837292635309e-07, "loss": 0.293621301651001, "step": 2364 }, { "epoch": 2.552319309600863, "grad_norm": 1.1936835050582886, "learning_rate": 3.079646385912502e-07, "loss": 0.1373947560787201, "step": 2366 }, { "epoch": 2.5544768069039914, "grad_norm": 1.4387550354003906, "learning_rate": 3.069500309041283e-07, "loss": 0.34591472148895264, "step": 2368 }, { "epoch": 2.55663430420712, "grad_norm": 36.577571868896484, "learning_rate": 3.05939911944928e-07, "loss": 0.6267740726470947, "step": 2370 }, { "epoch": 2.558791801510248, "grad_norm": 1.3116552829742432, "learning_rate": 3.049342874310053e-07, "loss": 0.25853827595710754, "step": 2372 }, { "epoch": 2.5609492988133766, "grad_norm": 1.2759634256362915, "learning_rate": 3.0393316305427743e-07, "loss": 0.29107633233070374, "step": 2374 }, { "epoch": 2.5631067961165046, "grad_norm": 2.6377315521240234, "learning_rate": 3.0293654448119094e-07, "loss": 0.3561844527721405, "step": 2376 }, { "epoch": 2.5652642934196335, "grad_norm": 1.450838565826416, "learning_rate": 3.0194443735268855e-07, "loss": 0.24433653056621552, "step": 2378 }, { "epoch": 2.5674217907227614, "grad_norm": 5.896989822387695, "learning_rate": 3.009568472841778e-07, "loss": 0.3096998929977417, "step": 2380 }, { "epoch": 2.56957928802589, "grad_norm": 1.285710096359253, "learning_rate": 2.999737798654999e-07, "loss": 0.23295409977436066, "step": 2382 }, { "epoch": 2.5717367853290183, "grad_norm": 1.9540512561798096, "learning_rate": 2.9899524066089715e-07, "loss": 0.3955782949924469, "step": 2384 }, { "epoch": 2.5738942826321467, "grad_norm": 1.3187015056610107, "learning_rate": 2.980212352089816e-07, "loss": 0.34291237592697144, "step": 2386 }, { "epoch": 2.576051779935275, "grad_norm": 35.879234313964844, "learning_rate": 2.9705176902270386e-07, "loss": 0.2686152160167694, "step": 2388 }, { "epoch": 2.5782092772384035, "grad_norm": 1.8663164377212524, "learning_rate": 2.960868475893224e-07, "loss": 0.20186705887317657, "step": 2390 }, { "epoch": 2.580366774541532, "grad_norm": 1.9428675174713135, "learning_rate": 2.951264763703719e-07, "loss": 0.2464224100112915, "step": 2392 }, { "epoch": 2.58252427184466, "grad_norm": 1.2760838270187378, "learning_rate": 2.941706608016317e-07, "loss": 0.2949107885360718, "step": 2394 }, { "epoch": 2.584681769147789, "grad_norm": 1.605370044708252, "learning_rate": 2.9321940629309705e-07, "loss": 0.2963062822818756, "step": 2396 }, { "epoch": 2.5868392664509168, "grad_norm": 1.2159507274627686, "learning_rate": 2.9227271822894615e-07, "loss": 0.25476306676864624, "step": 2398 }, { "epoch": 2.588996763754045, "grad_norm": 1.5449872016906738, "learning_rate": 2.913306019675114e-07, "loss": 0.1481795310974121, "step": 2400 }, { "epoch": 2.5911542610571736, "grad_norm": 1.4837470054626465, "learning_rate": 2.9039306284124764e-07, "loss": 0.1671726405620575, "step": 2402 }, { "epoch": 2.593311758360302, "grad_norm": 2.013652801513672, "learning_rate": 2.8946010615670397e-07, "loss": 0.3186720609664917, "step": 2404 }, { "epoch": 2.5954692556634305, "grad_norm": 1.3679430484771729, "learning_rate": 2.8853173719449153e-07, "loss": 0.19645805656909943, "step": 2406 }, { "epoch": 2.597626752966559, "grad_norm": 4.073277473449707, "learning_rate": 2.8760796120925455e-07, "loss": 0.27875351905822754, "step": 2408 }, { "epoch": 2.5997842502696873, "grad_norm": 2.3851447105407715, "learning_rate": 2.8668878342964165e-07, "loss": 0.3042440414428711, "step": 2410 }, { "epoch": 2.6019417475728153, "grad_norm": 3.0329368114471436, "learning_rate": 2.8577420905827356e-07, "loss": 0.40673866868019104, "step": 2412 }, { "epoch": 2.604099244875944, "grad_norm": 1.5875539779663086, "learning_rate": 2.848642432717171e-07, "loss": 0.3267652988433838, "step": 2414 }, { "epoch": 2.606256742179072, "grad_norm": 1.2932881116867065, "learning_rate": 2.8395889122045293e-07, "loss": 0.3058151602745056, "step": 2416 }, { "epoch": 2.6084142394822005, "grad_norm": 1.7162359952926636, "learning_rate": 2.8305815802884807e-07, "loss": 0.3365314304828644, "step": 2418 }, { "epoch": 2.610571736785329, "grad_norm": 2.6204607486724854, "learning_rate": 2.8216204879512613e-07, "loss": 0.26252228021621704, "step": 2420 }, { "epoch": 2.6127292340884574, "grad_norm": 1.4677083492279053, "learning_rate": 2.8127056859133914e-07, "loss": 0.22944192588329315, "step": 2422 }, { "epoch": 2.614886731391586, "grad_norm": 1.4243353605270386, "learning_rate": 2.803837224633385e-07, "loss": 0.30490928888320923, "step": 2424 }, { "epoch": 2.617044228694714, "grad_norm": 1.6455210447311401, "learning_rate": 2.795015154307454e-07, "loss": 0.3725619614124298, "step": 2426 }, { "epoch": 2.6192017259978426, "grad_norm": 1.263656497001648, "learning_rate": 2.786239524869247e-07, "loss": 0.38048383593559265, "step": 2428 }, { "epoch": 2.6213592233009706, "grad_norm": 4.27044677734375, "learning_rate": 2.7775103859895443e-07, "loss": 0.306596577167511, "step": 2430 }, { "epoch": 2.6235167206040995, "grad_norm": 1.5108482837677002, "learning_rate": 2.7688277870759877e-07, "loss": 0.25143009424209595, "step": 2432 }, { "epoch": 2.6256742179072274, "grad_norm": 2.3310492038726807, "learning_rate": 2.7601917772728e-07, "loss": 0.38734516501426697, "step": 2434 }, { "epoch": 2.627831715210356, "grad_norm": 3.2000060081481934, "learning_rate": 2.7516024054605076e-07, "loss": 0.3311081528663635, "step": 2436 }, { "epoch": 2.6299892125134843, "grad_norm": 1.907240390777588, "learning_rate": 2.743059720255658e-07, "loss": 0.1861996203660965, "step": 2438 }, { "epoch": 2.6321467098166127, "grad_norm": 1.7378534078598022, "learning_rate": 2.73456377001055e-07, "loss": 0.21612344682216644, "step": 2440 }, { "epoch": 2.634304207119741, "grad_norm": 1.9995208978652954, "learning_rate": 2.726114602812962e-07, "loss": 0.20262135565280914, "step": 2442 }, { "epoch": 2.6364617044228695, "grad_norm": 2.0192410945892334, "learning_rate": 2.7177122664858727e-07, "loss": 0.3542102575302124, "step": 2444 }, { "epoch": 2.638619201725998, "grad_norm": 1.935210943222046, "learning_rate": 2.709356808587195e-07, "loss": 0.39216798543930054, "step": 2446 }, { "epoch": 2.6407766990291264, "grad_norm": 1.6480642557144165, "learning_rate": 2.7010482764095047e-07, "loss": 0.12209905683994293, "step": 2448 }, { "epoch": 2.642934196332255, "grad_norm": 7.414170265197754, "learning_rate": 2.6927867169797805e-07, "loss": 0.5208877325057983, "step": 2450 }, { "epoch": 2.6450916936353828, "grad_norm": 1.8177531957626343, "learning_rate": 2.6845721770591236e-07, "loss": 0.4026768207550049, "step": 2452 }, { "epoch": 2.647249190938511, "grad_norm": 7.202023506164551, "learning_rate": 2.676404703142503e-07, "loss": 0.25339025259017944, "step": 2454 }, { "epoch": 2.6494066882416396, "grad_norm": 1.1629081964492798, "learning_rate": 2.6682843414584954e-07, "loss": 0.27695736289024353, "step": 2456 }, { "epoch": 2.651564185544768, "grad_norm": 2.3341569900512695, "learning_rate": 2.660211137969013e-07, "loss": 0.1916518658399582, "step": 2458 }, { "epoch": 2.6537216828478964, "grad_norm": 3.8498494625091553, "learning_rate": 2.6521851383690486e-07, "loss": 0.28546687960624695, "step": 2460 }, { "epoch": 2.655879180151025, "grad_norm": 1.4487419128417969, "learning_rate": 2.6442063880864183e-07, "loss": 0.2577356994152069, "step": 2462 }, { "epoch": 2.6580366774541533, "grad_norm": 1.4185736179351807, "learning_rate": 2.636274932281508e-07, "loss": 0.335868775844574, "step": 2464 }, { "epoch": 2.6601941747572817, "grad_norm": 0.31347620487213135, "learning_rate": 2.628390815847005e-07, "loss": 0.08725874125957489, "step": 2466 }, { "epoch": 2.66235167206041, "grad_norm": 1.61336088180542, "learning_rate": 2.6205540834076545e-07, "loss": 0.38296324014663696, "step": 2468 }, { "epoch": 2.664509169363538, "grad_norm": 1.5005100965499878, "learning_rate": 2.6127647793200105e-07, "loss": 0.29919686913490295, "step": 2470 }, { "epoch": 2.6666666666666665, "grad_norm": 1.2096229791641235, "learning_rate": 2.6050229476721666e-07, "loss": 0.20811551809310913, "step": 2472 }, { "epoch": 2.668824163969795, "grad_norm": 1.0160880088806152, "learning_rate": 2.59732863228353e-07, "loss": 0.2570361793041229, "step": 2474 }, { "epoch": 2.6709816612729234, "grad_norm": 1.900524377822876, "learning_rate": 2.589681876704557e-07, "loss": 0.2715557813644409, "step": 2476 }, { "epoch": 2.6731391585760518, "grad_norm": 4.474449157714844, "learning_rate": 2.58208272421651e-07, "loss": 0.3477630913257599, "step": 2478 }, { "epoch": 2.67529665587918, "grad_norm": 1.686800241470337, "learning_rate": 2.574531217831218e-07, "loss": 0.3386651277542114, "step": 2480 }, { "epoch": 2.6774541531823086, "grad_norm": 1.3463099002838135, "learning_rate": 2.567027400290826e-07, "loss": 0.24490870535373688, "step": 2482 }, { "epoch": 2.679611650485437, "grad_norm": 1.4641351699829102, "learning_rate": 2.5595713140675575e-07, "loss": 0.2602543234825134, "step": 2484 }, { "epoch": 2.6817691477885655, "grad_norm": 1.584945797920227, "learning_rate": 2.55216300136347e-07, "loss": 0.2942560911178589, "step": 2486 }, { "epoch": 2.6839266450916934, "grad_norm": 1.3279131650924683, "learning_rate": 2.544802504110226e-07, "loss": 0.2843012809753418, "step": 2488 }, { "epoch": 2.686084142394822, "grad_norm": 1.5675629377365112, "learning_rate": 2.537489863968842e-07, "loss": 0.3618108034133911, "step": 2490 }, { "epoch": 2.6882416396979503, "grad_norm": 1.386384129524231, "learning_rate": 2.530225122329459e-07, "loss": 0.18842831254005432, "step": 2492 }, { "epoch": 2.6903991370010787, "grad_norm": 0.44938626885414124, "learning_rate": 2.5230083203111163e-07, "loss": 0.010065621696412563, "step": 2494 }, { "epoch": 2.692556634304207, "grad_norm": 1.9967701435089111, "learning_rate": 2.5158394987615014e-07, "loss": 0.3003666400909424, "step": 2496 }, { "epoch": 2.6947141316073355, "grad_norm": 0.5517582893371582, "learning_rate": 2.5087186982567345e-07, "loss": 0.1282682716846466, "step": 2498 }, { "epoch": 2.696871628910464, "grad_norm": 1.3890687227249146, "learning_rate": 2.5016459591011287e-07, "loss": 0.3012073338031769, "step": 2500 }, { "epoch": 2.6990291262135924, "grad_norm": 2.243579387664795, "learning_rate": 2.494621321326972e-07, "loss": 0.3187774121761322, "step": 2502 }, { "epoch": 2.701186623516721, "grad_norm": 1.2543731927871704, "learning_rate": 2.487644824694288e-07, "loss": 0.2931416630744934, "step": 2504 }, { "epoch": 2.7033441208198488, "grad_norm": 2.056020975112915, "learning_rate": 2.48071650869062e-07, "loss": 0.24611467123031616, "step": 2506 }, { "epoch": 2.705501618122977, "grad_norm": 2.098752498626709, "learning_rate": 2.473836412530809e-07, "loss": 0.3165457248687744, "step": 2508 }, { "epoch": 2.7076591154261056, "grad_norm": 4.687522888183594, "learning_rate": 2.46700457515676e-07, "loss": 0.4136981666088104, "step": 2510 }, { "epoch": 2.709816612729234, "grad_norm": 0.5573480725288391, "learning_rate": 2.460221035237235e-07, "loss": 0.15423323214054108, "step": 2512 }, { "epoch": 2.7119741100323624, "grad_norm": 1.1807475090026855, "learning_rate": 2.453485831167625e-07, "loss": 0.2989809811115265, "step": 2514 }, { "epoch": 2.714131607335491, "grad_norm": 1.6326533555984497, "learning_rate": 2.446799001069742e-07, "loss": 0.3671968877315521, "step": 2516 }, { "epoch": 2.7162891046386193, "grad_norm": 4.470088958740234, "learning_rate": 2.440160582791589e-07, "loss": 0.3751377463340759, "step": 2518 }, { "epoch": 2.7184466019417477, "grad_norm": 0.9408198595046997, "learning_rate": 2.43357061390716e-07, "loss": 0.3237053155899048, "step": 2520 }, { "epoch": 2.720604099244876, "grad_norm": 1.8276516199111938, "learning_rate": 2.42702913171622e-07, "loss": 0.3124433755874634, "step": 2522 }, { "epoch": 2.722761596548004, "grad_norm": 1.5803215503692627, "learning_rate": 2.420536173244094e-07, "loss": 0.2791770100593567, "step": 2524 }, { "epoch": 2.724919093851133, "grad_norm": 3.284719705581665, "learning_rate": 2.414091775241462e-07, "loss": 0.3442307114601135, "step": 2526 }, { "epoch": 2.727076591154261, "grad_norm": 1.687919020652771, "learning_rate": 2.4076959741841445e-07, "loss": 0.3351602852344513, "step": 2528 }, { "epoch": 2.7292340884573894, "grad_norm": 7.2448225021362305, "learning_rate": 2.4013488062728993e-07, "loss": 0.3161589205265045, "step": 2530 }, { "epoch": 2.7313915857605178, "grad_norm": 4.134527206420898, "learning_rate": 2.395050307433219e-07, "loss": 0.36085984110832214, "step": 2532 }, { "epoch": 2.733549083063646, "grad_norm": 1.2739371061325073, "learning_rate": 2.3888005133151255e-07, "loss": 0.22625665366649628, "step": 2534 }, { "epoch": 2.7357065803667746, "grad_norm": 1.8671566247940063, "learning_rate": 2.3825994592929645e-07, "loss": 0.2694007158279419, "step": 2536 }, { "epoch": 2.737864077669903, "grad_norm": 7.57783842086792, "learning_rate": 2.3764471804652095e-07, "loss": 0.18972235918045044, "step": 2538 }, { "epoch": 2.7400215749730314, "grad_norm": 2.3178181648254395, "learning_rate": 2.370343711654267e-07, "loss": 0.277940571308136, "step": 2540 }, { "epoch": 2.7421790722761594, "grad_norm": 1.989964246749878, "learning_rate": 2.36428908740627e-07, "loss": 0.22704048454761505, "step": 2542 }, { "epoch": 2.7443365695792883, "grad_norm": 1.57944655418396, "learning_rate": 2.358283341990889e-07, "loss": 0.25424429774284363, "step": 2544 }, { "epoch": 2.7464940668824163, "grad_norm": 2.715576410293579, "learning_rate": 2.352326509401134e-07, "loss": 0.2091311663389206, "step": 2546 }, { "epoch": 2.7486515641855447, "grad_norm": 3.7704293727874756, "learning_rate": 2.3464186233531696e-07, "loss": 0.316684752702713, "step": 2548 }, { "epoch": 2.750809061488673, "grad_norm": 0.6256927847862244, "learning_rate": 2.3405597172861135e-07, "loss": 0.2244507223367691, "step": 2550 }, { "epoch": 2.7529665587918015, "grad_norm": 1.8245450258255005, "learning_rate": 2.3347498243618558e-07, "loss": 0.21601910889148712, "step": 2552 }, { "epoch": 2.75512405609493, "grad_norm": 2.7671761512756348, "learning_rate": 2.3289889774648675e-07, "loss": 0.25035250186920166, "step": 2554 }, { "epoch": 2.7572815533980584, "grad_norm": 1.276296854019165, "learning_rate": 2.3232772092020148e-07, "loss": 0.18391655385494232, "step": 2556 }, { "epoch": 2.759439050701187, "grad_norm": 3.1530673503875732, "learning_rate": 2.3176145519023742e-07, "loss": 0.2945748567581177, "step": 2558 }, { "epoch": 2.7615965480043148, "grad_norm": 1.5141795873641968, "learning_rate": 2.312001037617051e-07, "loss": 0.3175848424434662, "step": 2560 }, { "epoch": 2.7637540453074436, "grad_norm": 1.762587547302246, "learning_rate": 2.3064366981189995e-07, "loss": 0.2632935643196106, "step": 2562 }, { "epoch": 2.7659115426105716, "grad_norm": 2.4396347999572754, "learning_rate": 2.3009215649028332e-07, "loss": 0.12333346903324127, "step": 2564 }, { "epoch": 2.7680690399137, "grad_norm": 0.8734754323959351, "learning_rate": 2.295455669184662e-07, "loss": 0.1719101369380951, "step": 2566 }, { "epoch": 2.7702265372168284, "grad_norm": 2.0594780445098877, "learning_rate": 2.2900390419019047e-07, "loss": 0.24180670082569122, "step": 2568 }, { "epoch": 2.772384034519957, "grad_norm": 3.8657302856445312, "learning_rate": 2.2846717137131139e-07, "loss": 0.17427459359169006, "step": 2570 }, { "epoch": 2.7745415318230853, "grad_norm": 1.2864545583724976, "learning_rate": 2.2793537149978097e-07, "loss": 0.3185139298439026, "step": 2572 }, { "epoch": 2.7766990291262137, "grad_norm": 1.2937431335449219, "learning_rate": 2.2740850758563e-07, "loss": 0.25411853194236755, "step": 2574 }, { "epoch": 2.778856526429342, "grad_norm": 2.3176355361938477, "learning_rate": 2.2688658261095177e-07, "loss": 0.20836421847343445, "step": 2576 }, { "epoch": 2.78101402373247, "grad_norm": 2.0247695446014404, "learning_rate": 2.2636959952988402e-07, "loss": 0.27753064036369324, "step": 2578 }, { "epoch": 2.783171521035599, "grad_norm": 1.718490481376648, "learning_rate": 2.2585756126859373e-07, "loss": 0.26367393136024475, "step": 2580 }, { "epoch": 2.785329018338727, "grad_norm": 2.086444854736328, "learning_rate": 2.2535047072525968e-07, "loss": 0.2552420198917389, "step": 2582 }, { "epoch": 2.7874865156418553, "grad_norm": 2.691962957382202, "learning_rate": 2.2484833077005534e-07, "loss": 0.38996651768684387, "step": 2584 }, { "epoch": 2.7896440129449838, "grad_norm": 3.1365230083465576, "learning_rate": 2.2435114424513468e-07, "loss": 0.2881295680999756, "step": 2586 }, { "epoch": 2.791801510248112, "grad_norm": 0.678428053855896, "learning_rate": 2.23858913964614e-07, "loss": 0.3442489802837372, "step": 2588 }, { "epoch": 2.7939590075512406, "grad_norm": 1.9768662452697754, "learning_rate": 2.233716427145571e-07, "loss": 0.3336244225502014, "step": 2590 }, { "epoch": 2.796116504854369, "grad_norm": 7.05756139755249, "learning_rate": 2.2288933325295919e-07, "loss": 0.3653881251811981, "step": 2592 }, { "epoch": 2.7982740021574974, "grad_norm": 1.4996511936187744, "learning_rate": 2.224119883097315e-07, "loss": 0.337455153465271, "step": 2594 }, { "epoch": 2.8004314994606254, "grad_norm": 1.5251402854919434, "learning_rate": 2.2193961058668565e-07, "loss": 0.24892055988311768, "step": 2596 }, { "epoch": 2.8025889967637543, "grad_norm": 2.474886655807495, "learning_rate": 2.2147220275751817e-07, "loss": 0.38033241033554077, "step": 2598 }, { "epoch": 2.8047464940668823, "grad_norm": 2.1626007556915283, "learning_rate": 2.2100976746779575e-07, "loss": 0.2779306471347809, "step": 2600 }, { "epoch": 2.8069039913700107, "grad_norm": 1.5766234397888184, "learning_rate": 2.2055230733494034e-07, "loss": 0.198373481631279, "step": 2602 }, { "epoch": 2.809061488673139, "grad_norm": 1.3127539157867432, "learning_rate": 2.2009982494821354e-07, "loss": 0.2616628110408783, "step": 2604 }, { "epoch": 2.8112189859762675, "grad_norm": 1.8110085725784302, "learning_rate": 2.1965232286870293e-07, "loss": 0.34928035736083984, "step": 2606 }, { "epoch": 2.813376483279396, "grad_norm": 0.9856870770454407, "learning_rate": 2.1920980362930693e-07, "loss": 0.24830693006515503, "step": 2608 }, { "epoch": 2.8155339805825244, "grad_norm": 1.0019290447235107, "learning_rate": 2.1877226973472092e-07, "loss": 0.37566351890563965, "step": 2610 }, { "epoch": 2.8176914778856528, "grad_norm": 1.683439016342163, "learning_rate": 2.1833972366142252e-07, "loss": 0.20518970489501953, "step": 2612 }, { "epoch": 2.8198489751887807, "grad_norm": 1.2993359565734863, "learning_rate": 2.1791216785765812e-07, "loss": 0.3496171832084656, "step": 2614 }, { "epoch": 2.8220064724919096, "grad_norm": 1.8535475730895996, "learning_rate": 2.1748960474342858e-07, "loss": 0.40503692626953125, "step": 2616 }, { "epoch": 2.8241639697950376, "grad_norm": 1.230737328529358, "learning_rate": 2.1707203671047588e-07, "loss": 0.3467937111854553, "step": 2618 }, { "epoch": 2.826321467098166, "grad_norm": 2.092404365539551, "learning_rate": 2.166594661222692e-07, "loss": 0.45293277502059937, "step": 2620 }, { "epoch": 2.8284789644012944, "grad_norm": 1.2861336469650269, "learning_rate": 2.162518953139921e-07, "loss": 0.2429104447364807, "step": 2622 }, { "epoch": 2.830636461704423, "grad_norm": 1.7713249921798706, "learning_rate": 2.1584932659252883e-07, "loss": 0.31055137515068054, "step": 2624 }, { "epoch": 2.8327939590075513, "grad_norm": 2.2935972213745117, "learning_rate": 2.1545176223645118e-07, "loss": 0.44860854744911194, "step": 2626 }, { "epoch": 2.8349514563106797, "grad_norm": 3.7632486820220947, "learning_rate": 2.1505920449600637e-07, "loss": 0.45059871673583984, "step": 2628 }, { "epoch": 2.837108953613808, "grad_norm": 1.9739702939987183, "learning_rate": 2.146716555931031e-07, "loss": 0.32334843277931213, "step": 2630 }, { "epoch": 2.839266450916936, "grad_norm": 1.8699138164520264, "learning_rate": 2.1428911772130022e-07, "loss": 0.37822097539901733, "step": 2632 }, { "epoch": 2.841423948220065, "grad_norm": 1.3838216066360474, "learning_rate": 2.1391159304579338e-07, "loss": 0.2415277361869812, "step": 2634 }, { "epoch": 2.843581445523193, "grad_norm": 2.178896427154541, "learning_rate": 2.1353908370340319e-07, "loss": 0.36013925075531006, "step": 2636 }, { "epoch": 2.8457389428263213, "grad_norm": 1.6107887029647827, "learning_rate": 2.131715918025631e-07, "loss": 0.28840532898902893, "step": 2638 }, { "epoch": 2.8478964401294498, "grad_norm": 1.2073341608047485, "learning_rate": 2.1280911942330754e-07, "loss": 0.24024561047554016, "step": 2640 }, { "epoch": 2.850053937432578, "grad_norm": 1.346824049949646, "learning_rate": 2.1245166861725987e-07, "loss": 0.29280197620391846, "step": 2642 }, { "epoch": 2.8522114347357066, "grad_norm": 2.1668944358825684, "learning_rate": 2.1209924140762103e-07, "loss": 0.27317503094673157, "step": 2644 }, { "epoch": 2.854368932038835, "grad_norm": 1.5950590372085571, "learning_rate": 2.1175183978915794e-07, "loss": 0.16236615180969238, "step": 2646 }, { "epoch": 2.8565264293419634, "grad_norm": 1.4013762474060059, "learning_rate": 2.1140946572819222e-07, "loss": 0.23140932619571686, "step": 2648 }, { "epoch": 2.858683926645092, "grad_norm": 1.3808273077011108, "learning_rate": 2.1107212116258926e-07, "loss": 0.34648364782333374, "step": 2650 }, { "epoch": 2.8608414239482203, "grad_norm": 1.66391921043396, "learning_rate": 2.107398080017468e-07, "loss": 0.3459605872631073, "step": 2652 }, { "epoch": 2.8629989212513482, "grad_norm": 3.6172289848327637, "learning_rate": 2.1041252812658484e-07, "loss": 0.25366389751434326, "step": 2654 }, { "epoch": 2.8651564185544767, "grad_norm": 1.0681893825531006, "learning_rate": 2.100902833895342e-07, "loss": 0.185197651386261, "step": 2656 }, { "epoch": 2.867313915857605, "grad_norm": 1.549970030784607, "learning_rate": 2.0977307561452663e-07, "loss": 0.1989063322544098, "step": 2658 }, { "epoch": 2.8694714131607335, "grad_norm": 1.3811312913894653, "learning_rate": 2.09460906596984e-07, "loss": 0.31181615591049194, "step": 2660 }, { "epoch": 2.871628910463862, "grad_norm": 1.6068974733352661, "learning_rate": 2.091537781038089e-07, "loss": 0.30436015129089355, "step": 2662 }, { "epoch": 2.8737864077669903, "grad_norm": 2.1774463653564453, "learning_rate": 2.0885169187337344e-07, "loss": 0.1961633861064911, "step": 2664 }, { "epoch": 2.8759439050701188, "grad_norm": 1.3629629611968994, "learning_rate": 2.0855464961551068e-07, "loss": 0.2554187774658203, "step": 2666 }, { "epoch": 2.878101402373247, "grad_norm": 1.1737473011016846, "learning_rate": 2.0826265301150424e-07, "loss": 0.2499612420797348, "step": 2668 }, { "epoch": 2.8802588996763756, "grad_norm": 0.43503567576408386, "learning_rate": 2.0797570371407868e-07, "loss": 0.11031116545200348, "step": 2670 }, { "epoch": 2.8824163969795036, "grad_norm": 1.2711269855499268, "learning_rate": 2.0769380334739064e-07, "loss": 0.27530673146247864, "step": 2672 }, { "epoch": 2.884573894282632, "grad_norm": 1.6386080980300903, "learning_rate": 2.0741695350701957e-07, "loss": 0.21418559551239014, "step": 2674 }, { "epoch": 2.8867313915857604, "grad_norm": 0.6615068912506104, "learning_rate": 2.0714515575995788e-07, "loss": 0.3122788667678833, "step": 2676 }, { "epoch": 2.888888888888889, "grad_norm": 1.6554388999938965, "learning_rate": 2.068784116446034e-07, "loss": 0.2509201467037201, "step": 2678 }, { "epoch": 2.8910463861920173, "grad_norm": 1.535337209701538, "learning_rate": 2.0661672267074972e-07, "loss": 0.2228378802537918, "step": 2680 }, { "epoch": 2.8932038834951457, "grad_norm": 1.8591066598892212, "learning_rate": 2.0636009031957781e-07, "loss": 0.4586015045642853, "step": 2682 }, { "epoch": 2.895361380798274, "grad_norm": 1.2504751682281494, "learning_rate": 2.0610851604364787e-07, "loss": 0.21972203254699707, "step": 2684 }, { "epoch": 2.8975188781014025, "grad_norm": 1.314433217048645, "learning_rate": 2.0586200126689092e-07, "loss": 0.30095550417900085, "step": 2686 }, { "epoch": 2.899676375404531, "grad_norm": 1.6293613910675049, "learning_rate": 2.0562054738460098e-07, "loss": 0.12489507347345352, "step": 2688 }, { "epoch": 2.901833872707659, "grad_norm": 2.388120651245117, "learning_rate": 2.0538415576342665e-07, "loss": 0.2403588593006134, "step": 2690 }, { "epoch": 2.9039913700107873, "grad_norm": 1.676954984664917, "learning_rate": 2.0515282774136402e-07, "loss": 0.24414768815040588, "step": 2692 }, { "epoch": 2.9061488673139158, "grad_norm": 2.731567621231079, "learning_rate": 2.0492656462774877e-07, "loss": 0.3925679326057434, "step": 2694 }, { "epoch": 2.908306364617044, "grad_norm": 1.193949818611145, "learning_rate": 2.047053677032484e-07, "loss": 0.31919193267822266, "step": 2696 }, { "epoch": 2.9104638619201726, "grad_norm": 1.6734915971755981, "learning_rate": 2.0448923821985597e-07, "loss": 0.2700918912887573, "step": 2698 }, { "epoch": 2.912621359223301, "grad_norm": 2.165048837661743, "learning_rate": 2.0427817740088204e-07, "loss": 0.09998652338981628, "step": 2700 }, { "epoch": 2.9147788565264294, "grad_norm": 1.4805729389190674, "learning_rate": 2.0407218644094798e-07, "loss": 0.26490524411201477, "step": 2702 }, { "epoch": 2.916936353829558, "grad_norm": 1.9566199779510498, "learning_rate": 2.0387126650597966e-07, "loss": 0.37730756402015686, "step": 2704 }, { "epoch": 2.9190938511326863, "grad_norm": 1.5146074295043945, "learning_rate": 2.036754187332004e-07, "loss": 0.2974068224430084, "step": 2706 }, { "epoch": 2.9212513484358142, "grad_norm": 1.5399764776229858, "learning_rate": 2.034846442311247e-07, "loss": 0.2960386276245117, "step": 2708 }, { "epoch": 2.9234088457389427, "grad_norm": 1.4276615381240845, "learning_rate": 2.0329894407955186e-07, "loss": 0.24433766305446625, "step": 2710 }, { "epoch": 2.925566343042071, "grad_norm": 1.12831711769104, "learning_rate": 2.0311831932956003e-07, "loss": 0.194054514169693, "step": 2712 }, { "epoch": 2.9277238403451995, "grad_norm": 4.204019546508789, "learning_rate": 2.0294277100350006e-07, "loss": 0.3664979338645935, "step": 2714 }, { "epoch": 2.929881337648328, "grad_norm": 1.073026180267334, "learning_rate": 2.0277230009498994e-07, "loss": 0.2648014426231384, "step": 2716 }, { "epoch": 2.9320388349514563, "grad_norm": 2.1484158039093018, "learning_rate": 2.026069075689089e-07, "loss": 0.28026407957077026, "step": 2718 }, { "epoch": 2.9341963322545848, "grad_norm": 2.006321907043457, "learning_rate": 2.0244659436139232e-07, "loss": 0.451577752828598, "step": 2720 }, { "epoch": 2.936353829557713, "grad_norm": 0.9612744450569153, "learning_rate": 2.0229136137982607e-07, "loss": 0.2079283595085144, "step": 2722 }, { "epoch": 2.9385113268608416, "grad_norm": 1.127065896987915, "learning_rate": 2.021412095028416e-07, "loss": 0.29667848348617554, "step": 2724 }, { "epoch": 2.9406688241639696, "grad_norm": 0.9780626893043518, "learning_rate": 2.019961395803108e-07, "loss": 0.15733566880226135, "step": 2726 }, { "epoch": 2.9428263214670984, "grad_norm": 1.398445725440979, "learning_rate": 2.0185615243334142e-07, "loss": 0.3436535894870758, "step": 2728 }, { "epoch": 2.9449838187702264, "grad_norm": 22.650222778320312, "learning_rate": 2.0172124885427215e-07, "loss": 0.23946398496627808, "step": 2730 }, { "epoch": 2.947141316073355, "grad_norm": 1.695454478263855, "learning_rate": 2.0159142960666828e-07, "loss": 0.22133874893188477, "step": 2732 }, { "epoch": 2.9492988133764833, "grad_norm": 3.043394088745117, "learning_rate": 2.0146669542531755e-07, "loss": 0.23711824417114258, "step": 2734 }, { "epoch": 2.9514563106796117, "grad_norm": 0.8506256937980652, "learning_rate": 2.0134704701622555e-07, "loss": 0.15003100037574768, "step": 2736 }, { "epoch": 2.95361380798274, "grad_norm": 1.9315284490585327, "learning_rate": 2.0123248505661205e-07, "loss": 0.27814292907714844, "step": 2738 }, { "epoch": 2.9557713052858685, "grad_norm": 1.3882677555084229, "learning_rate": 2.011230101949073e-07, "loss": 0.23976953327655792, "step": 2740 }, { "epoch": 2.957928802588997, "grad_norm": 2.189103126525879, "learning_rate": 2.0101862305074788e-07, "loss": 0.30090874433517456, "step": 2742 }, { "epoch": 2.960086299892125, "grad_norm": 1.476915955543518, "learning_rate": 2.0091932421497359e-07, "loss": 0.2663874328136444, "step": 2744 }, { "epoch": 2.9622437971952538, "grad_norm": 1.6229114532470703, "learning_rate": 2.0082511424962407e-07, "loss": 0.2835708558559418, "step": 2746 }, { "epoch": 2.9644012944983817, "grad_norm": 1.7014294862747192, "learning_rate": 2.0073599368793536e-07, "loss": 0.3245730698108673, "step": 2748 }, { "epoch": 2.96655879180151, "grad_norm": 1.3853977918624878, "learning_rate": 2.0065196303433735e-07, "loss": 0.41964831948280334, "step": 2750 }, { "epoch": 2.9687162891046386, "grad_norm": 1.847307801246643, "learning_rate": 2.0057302276445018e-07, "loss": 0.33610066771507263, "step": 2752 }, { "epoch": 2.970873786407767, "grad_norm": 1.3295162916183472, "learning_rate": 2.0049917332508245e-07, "loss": 0.32951587438583374, "step": 2754 }, { "epoch": 2.9730312837108954, "grad_norm": 1.515709638595581, "learning_rate": 2.0043041513422793e-07, "loss": 0.1392497420310974, "step": 2756 }, { "epoch": 2.975188781014024, "grad_norm": 1.3852156400680542, "learning_rate": 2.0036674858106364e-07, "loss": 0.23723219335079193, "step": 2758 }, { "epoch": 2.9773462783171523, "grad_norm": 0.7373172044754028, "learning_rate": 2.0030817402594758e-07, "loss": 0.0664176344871521, "step": 2760 }, { "epoch": 2.9795037756202802, "grad_norm": 0.5779815912246704, "learning_rate": 2.0025469180041652e-07, "loss": 0.19533909857273102, "step": 2762 }, { "epoch": 2.981661272923409, "grad_norm": 1.1505489349365234, "learning_rate": 2.0020630220718412e-07, "loss": 0.40156224370002747, "step": 2764 }, { "epoch": 2.983818770226537, "grad_norm": 1.542429804801941, "learning_rate": 2.0016300552013962e-07, "loss": 0.11573772132396698, "step": 2766 }, { "epoch": 2.9859762675296655, "grad_norm": 4.137073040008545, "learning_rate": 2.0012480198434574e-07, "loss": 0.4161064624786377, "step": 2768 }, { "epoch": 2.988133764832794, "grad_norm": 3.1401453018188477, "learning_rate": 2.0009169181603766e-07, "loss": 0.26901060342788696, "step": 2770 }, { "epoch": 2.9902912621359223, "grad_norm": 1.872072696685791, "learning_rate": 2.0006367520262163e-07, "loss": 0.36560726165771484, "step": 2772 }, { "epoch": 2.9924487594390508, "grad_norm": 1.6279696226119995, "learning_rate": 2.0004075230267392e-07, "loss": 0.22248563170433044, "step": 2774 }, { "epoch": 2.994606256742179, "grad_norm": 2.087898015975952, "learning_rate": 2.0002292324594007e-07, "loss": 0.2563750445842743, "step": 2776 }, { "epoch": 2.9967637540453076, "grad_norm": 2.9592933654785156, "learning_rate": 2.000101881333341e-07, "loss": 0.12756453454494476, "step": 2778 }, { "epoch": 2.9989212513484356, "grad_norm": 2.473083734512329, "learning_rate": 2.0000254703693767e-07, "loss": 0.1695672571659088, "step": 2780 }, { "epoch": 3.0, "step": 2781, "total_flos": 3.284111394515778e+18, "train_loss": 0.46147939157918366, "train_runtime": 26036.604, "train_samples_per_second": 1.709, "train_steps_per_second": 0.107 } ], "logging_steps": 2, "max_steps": 2781, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 99999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.284111394515778e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }