{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7351589781290204, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014703179562580409, "grad_norm": 1.7258896827697754, "learning_rate": 0.0, "loss": 1.6229, "step": 1 }, { "epoch": 0.00029406359125160817, "grad_norm": 1.2875570058822632, "learning_rate": 4e-05, "loss": 1.293, "step": 2 }, { "epoch": 0.00044109538687741223, "grad_norm": 1.1806529760360718, "learning_rate": 8e-05, "loss": 1.3713, "step": 3 }, { "epoch": 0.0005881271825032163, "grad_norm": 1.0785037279129028, "learning_rate": 0.00012, "loss": 1.9108, "step": 4 }, { "epoch": 0.0007351589781290203, "grad_norm": 0.589216411113739, "learning_rate": 0.00016, "loss": 1.6566, "step": 5 }, { "epoch": 0.0008821907737548245, "grad_norm": 0.7524330615997314, "learning_rate": 0.0002, "loss": 1.2765, "step": 6 }, { "epoch": 0.0010292225693806286, "grad_norm": 0.5628565549850464, "learning_rate": 0.00019555555555555556, "loss": 1.4254, "step": 7 }, { "epoch": 0.0011762543650064327, "grad_norm": 0.7264350652694702, "learning_rate": 0.00019111111111111114, "loss": 1.7523, "step": 8 }, { "epoch": 0.0013232861606322368, "grad_norm": 0.8216027617454529, "learning_rate": 0.0001866666666666667, "loss": 1.3511, "step": 9 }, { "epoch": 0.0014703179562580407, "grad_norm": 0.6742819547653198, "learning_rate": 0.00018222222222222224, "loss": 1.6546, "step": 10 }, { "epoch": 0.0016173497518838448, "grad_norm": 0.48201262950897217, "learning_rate": 0.00017777777777777779, "loss": 1.8844, "step": 11 }, { "epoch": 0.001764381547509649, "grad_norm": 0.5743035674095154, "learning_rate": 0.00017333333333333334, "loss": 1.2342, "step": 12 }, { "epoch": 0.001911413343135453, "grad_norm": 0.7406991124153137, "learning_rate": 0.00016888888888888889, "loss": 1.4613, "step": 13 }, { "epoch": 0.002058445138761257, "grad_norm": 0.6519092917442322, "learning_rate": 0.00016444444444444444, "loss": 1.498, "step": 14 }, { "epoch": 0.0022054769343870613, "grad_norm": 0.4849397540092468, "learning_rate": 0.00016, "loss": 1.1592, "step": 15 }, { "epoch": 0.0023525087300128654, "grad_norm": 0.6403281688690186, "learning_rate": 0.00015555555555555556, "loss": 1.3766, "step": 16 }, { "epoch": 0.0024995405256386695, "grad_norm": 0.8054800629615784, "learning_rate": 0.0001511111111111111, "loss": 1.2416, "step": 17 }, { "epoch": 0.0026465723212644736, "grad_norm": 0.768284261226654, "learning_rate": 0.00014666666666666666, "loss": 1.0412, "step": 18 }, { "epoch": 0.0027936041168902777, "grad_norm": 0.5003041625022888, "learning_rate": 0.00014222222222222224, "loss": 1.7483, "step": 19 }, { "epoch": 0.0029406359125160814, "grad_norm": 0.5596277117729187, "learning_rate": 0.0001377777777777778, "loss": 1.8583, "step": 20 }, { "epoch": 0.0030876677081418855, "grad_norm": 0.41995805501937866, "learning_rate": 0.00013333333333333334, "loss": 1.713, "step": 21 }, { "epoch": 0.0032346995037676896, "grad_norm": 0.5945447087287903, "learning_rate": 0.00012888888888888892, "loss": 1.3443, "step": 22 }, { "epoch": 0.0033817312993934937, "grad_norm": 0.6081192493438721, "learning_rate": 0.00012444444444444444, "loss": 1.4327, "step": 23 }, { "epoch": 0.003528763095019298, "grad_norm": 0.7344409227371216, "learning_rate": 0.00012, "loss": 1.3386, "step": 24 }, { "epoch": 0.003675794890645102, "grad_norm": 0.5514310598373413, "learning_rate": 0.00011555555555555555, "loss": 1.1797, "step": 25 }, { "epoch": 0.003822826686270906, "grad_norm": 0.7426251173019409, "learning_rate": 0.00011111111111111112, "loss": 1.7542, "step": 26 }, { "epoch": 0.00396985848189671, "grad_norm": 0.4866659641265869, "learning_rate": 0.00010666666666666667, "loss": 1.8658, "step": 27 }, { "epoch": 0.004116890277522514, "grad_norm": 0.629629909992218, "learning_rate": 0.00010222222222222222, "loss": 1.7218, "step": 28 }, { "epoch": 0.004263922073148318, "grad_norm": 0.5347703099250793, "learning_rate": 9.777777777777778e-05, "loss": 1.1007, "step": 29 }, { "epoch": 0.0044109538687741225, "grad_norm": 0.5639967918395996, "learning_rate": 9.333333333333334e-05, "loss": 1.4406, "step": 30 }, { "epoch": 0.004557985664399926, "grad_norm": 0.7082501649856567, "learning_rate": 8.888888888888889e-05, "loss": 1.4903, "step": 31 }, { "epoch": 0.004705017460025731, "grad_norm": 0.7058646082878113, "learning_rate": 8.444444444444444e-05, "loss": 1.5086, "step": 32 }, { "epoch": 0.0048520492556515344, "grad_norm": 0.6934836506843567, "learning_rate": 8e-05, "loss": 1.2166, "step": 33 }, { "epoch": 0.004999081051277339, "grad_norm": 0.6959455609321594, "learning_rate": 7.555555555555556e-05, "loss": 1.5596, "step": 34 }, { "epoch": 0.005146112846903143, "grad_norm": 0.5489625334739685, "learning_rate": 7.111111111111112e-05, "loss": 1.7426, "step": 35 }, { "epoch": 0.005293144642528947, "grad_norm": 0.8417682647705078, "learning_rate": 6.666666666666667e-05, "loss": 1.1871, "step": 36 }, { "epoch": 0.005440176438154751, "grad_norm": 0.4764294922351837, "learning_rate": 6.222222222222222e-05, "loss": 1.5354, "step": 37 }, { "epoch": 0.0055872082337805554, "grad_norm": 0.7552003860473633, "learning_rate": 5.7777777777777776e-05, "loss": 1.1869, "step": 38 }, { "epoch": 0.005734240029406359, "grad_norm": 0.694106936454773, "learning_rate": 5.333333333333333e-05, "loss": 1.5685, "step": 39 }, { "epoch": 0.005881271825032163, "grad_norm": 0.5134473443031311, "learning_rate": 4.888888888888889e-05, "loss": 1.5438, "step": 40 }, { "epoch": 0.006028303620657967, "grad_norm": 0.6107639074325562, "learning_rate": 4.4444444444444447e-05, "loss": 1.7796, "step": 41 }, { "epoch": 0.006175335416283771, "grad_norm": 0.7616034150123596, "learning_rate": 4e-05, "loss": 1.6502, "step": 42 }, { "epoch": 0.006322367211909576, "grad_norm": 0.7029539346694946, "learning_rate": 3.555555555555556e-05, "loss": 1.5426, "step": 43 }, { "epoch": 0.006469399007535379, "grad_norm": 0.6094809770584106, "learning_rate": 3.111111111111111e-05, "loss": 1.379, "step": 44 }, { "epoch": 0.006616430803161184, "grad_norm": 0.6784330606460571, "learning_rate": 2.6666666666666667e-05, "loss": 1.4356, "step": 45 }, { "epoch": 0.0067634625987869875, "grad_norm": 0.4609280824661255, "learning_rate": 2.2222222222222223e-05, "loss": 1.6887, "step": 46 }, { "epoch": 0.006910494394412792, "grad_norm": 0.560640811920166, "learning_rate": 1.777777777777778e-05, "loss": 1.7814, "step": 47 }, { "epoch": 0.007057526190038596, "grad_norm": 0.7606922388076782, "learning_rate": 1.3333333333333333e-05, "loss": 1.0658, "step": 48 }, { "epoch": 0.0072045579856644, "grad_norm": 0.7615998983383179, "learning_rate": 8.88888888888889e-06, "loss": 1.2465, "step": 49 }, { "epoch": 0.007351589781290204, "grad_norm": 0.4773058295249939, "learning_rate": 4.444444444444445e-06, "loss": 1.2389, "step": 50 }, { "epoch": 0.0074986215769160085, "grad_norm": 0.5604999661445618, "learning_rate": 0.0, "loss": 1.7905, "step": 51 }, { "epoch": 0.007645653372541812, "grad_norm": 0.5312463045120239, "learning_rate": 0.0001969282136894825, "loss": 1.5059, "step": 52 }, { "epoch": 0.007792685168167616, "grad_norm": 0.719408392906189, "learning_rate": 0.00019686143572621037, "loss": 1.1692, "step": 53 }, { "epoch": 0.00793971696379342, "grad_norm": 0.7367963194847107, "learning_rate": 0.00019679465776293825, "loss": 1.3044, "step": 54 }, { "epoch": 0.008086748759419225, "grad_norm": 0.7660955190658569, "learning_rate": 0.00019672787979966612, "loss": 1.1891, "step": 55 }, { "epoch": 0.008233780555045029, "grad_norm": 0.6879117488861084, "learning_rate": 0.000196661101836394, "loss": 1.169, "step": 56 }, { "epoch": 0.008380812350670832, "grad_norm": 0.7095165848731995, "learning_rate": 0.0001965943238731219, "loss": 1.396, "step": 57 }, { "epoch": 0.008527844146296636, "grad_norm": 0.6931248903274536, "learning_rate": 0.00019652754590984977, "loss": 1.3106, "step": 58 }, { "epoch": 0.008674875941922441, "grad_norm": 0.7816098928451538, "learning_rate": 0.00019646076794657764, "loss": 1.3725, "step": 59 }, { "epoch": 0.008821907737548245, "grad_norm": 0.7630704045295715, "learning_rate": 0.0001963939899833055, "loss": 1.3281, "step": 60 }, { "epoch": 0.008968939533174049, "grad_norm": 0.7464380264282227, "learning_rate": 0.00019632721202003339, "loss": 1.294, "step": 61 }, { "epoch": 0.009115971328799852, "grad_norm": 0.7003430724143982, "learning_rate": 0.00019626043405676129, "loss": 1.3425, "step": 62 }, { "epoch": 0.009263003124425658, "grad_norm": 0.6538450717926025, "learning_rate": 0.00019619365609348916, "loss": 1.3189, "step": 63 }, { "epoch": 0.009410034920051462, "grad_norm": 0.7302049994468689, "learning_rate": 0.00019612687813021703, "loss": 1.2458, "step": 64 }, { "epoch": 0.009557066715677265, "grad_norm": 0.7778714299201965, "learning_rate": 0.0001960601001669449, "loss": 1.1513, "step": 65 }, { "epoch": 0.009704098511303069, "grad_norm": 0.8034526109695435, "learning_rate": 0.00019599332220367278, "loss": 1.3217, "step": 66 }, { "epoch": 0.009851130306928873, "grad_norm": 0.49932941794395447, "learning_rate": 0.00019592654424040068, "loss": 1.6182, "step": 67 }, { "epoch": 0.009998162102554678, "grad_norm": 0.6123740673065186, "learning_rate": 0.00019585976627712855, "loss": 1.7013, "step": 68 }, { "epoch": 0.010145193898180482, "grad_norm": 0.6953626275062561, "learning_rate": 0.00019579298831385645, "loss": 1.231, "step": 69 }, { "epoch": 0.010292225693806285, "grad_norm": 0.5491873025894165, "learning_rate": 0.00019572621035058433, "loss": 1.199, "step": 70 }, { "epoch": 0.010439257489432089, "grad_norm": 0.6250758171081543, "learning_rate": 0.0001956594323873122, "loss": 1.358, "step": 71 }, { "epoch": 0.010586289285057894, "grad_norm": 0.7545821070671082, "learning_rate": 0.00019559265442404007, "loss": 1.2576, "step": 72 }, { "epoch": 0.010733321080683698, "grad_norm": 0.6840016841888428, "learning_rate": 0.00019552587646076797, "loss": 1.324, "step": 73 }, { "epoch": 0.010880352876309502, "grad_norm": 0.5197773575782776, "learning_rate": 0.00019545909849749584, "loss": 1.3146, "step": 74 }, { "epoch": 0.011027384671935305, "grad_norm": 0.4652192294597626, "learning_rate": 0.00019539232053422372, "loss": 1.735, "step": 75 }, { "epoch": 0.011174416467561111, "grad_norm": 0.4333137571811676, "learning_rate": 0.0001953255425709516, "loss": 1.6144, "step": 76 }, { "epoch": 0.011321448263186915, "grad_norm": 0.7623845934867859, "learning_rate": 0.00019525876460767946, "loss": 1.3889, "step": 77 }, { "epoch": 0.011468480058812718, "grad_norm": 0.693976104259491, "learning_rate": 0.00019519198664440736, "loss": 1.3276, "step": 78 }, { "epoch": 0.011615511854438522, "grad_norm": 0.6791786551475525, "learning_rate": 0.00019512520868113524, "loss": 1.6312, "step": 79 }, { "epoch": 0.011762543650064326, "grad_norm": 0.6400951147079468, "learning_rate": 0.0001950584307178631, "loss": 1.3499, "step": 80 }, { "epoch": 0.011909575445690131, "grad_norm": 0.6780531406402588, "learning_rate": 0.00019499165275459098, "loss": 1.115, "step": 81 }, { "epoch": 0.012056607241315935, "grad_norm": 0.7627335786819458, "learning_rate": 0.00019492487479131886, "loss": 1.3998, "step": 82 }, { "epoch": 0.012203639036941738, "grad_norm": 0.7756338119506836, "learning_rate": 0.00019485809682804673, "loss": 1.3275, "step": 83 }, { "epoch": 0.012350670832567542, "grad_norm": 0.5861336588859558, "learning_rate": 0.00019479131886477463, "loss": 1.9574, "step": 84 }, { "epoch": 0.012497702628193347, "grad_norm": 0.7777627110481262, "learning_rate": 0.0001947245409015025, "loss": 1.4408, "step": 85 }, { "epoch": 0.012644734423819151, "grad_norm": 0.6070252656936646, "learning_rate": 0.0001946577629382304, "loss": 1.5281, "step": 86 }, { "epoch": 0.012791766219444955, "grad_norm": 0.5790751576423645, "learning_rate": 0.00019459098497495828, "loss": 1.1907, "step": 87 }, { "epoch": 0.012938798015070758, "grad_norm": 0.5126588940620422, "learning_rate": 0.00019452420701168615, "loss": 1.4943, "step": 88 }, { "epoch": 0.013085829810696564, "grad_norm": 0.8037551641464233, "learning_rate": 0.00019445742904841405, "loss": 1.2655, "step": 89 }, { "epoch": 0.013232861606322368, "grad_norm": 0.6323136687278748, "learning_rate": 0.00019439065108514192, "loss": 1.4879, "step": 90 }, { "epoch": 0.013379893401948171, "grad_norm": 0.5359807014465332, "learning_rate": 0.0001943238731218698, "loss": 1.1416, "step": 91 }, { "epoch": 0.013526925197573975, "grad_norm": 0.648858904838562, "learning_rate": 0.00019425709515859767, "loss": 1.6013, "step": 92 }, { "epoch": 0.013673956993199779, "grad_norm": 0.6926876902580261, "learning_rate": 0.00019419031719532554, "loss": 1.2115, "step": 93 }, { "epoch": 0.013820988788825584, "grad_norm": 0.6817899346351624, "learning_rate": 0.00019412353923205344, "loss": 1.3929, "step": 94 }, { "epoch": 0.013968020584451388, "grad_norm": 0.6183105707168579, "learning_rate": 0.00019405676126878132, "loss": 1.8325, "step": 95 }, { "epoch": 0.014115052380077191, "grad_norm": 0.5492458343505859, "learning_rate": 0.0001939899833055092, "loss": 1.5649, "step": 96 }, { "epoch": 0.014262084175702995, "grad_norm": 0.7207104563713074, "learning_rate": 0.00019392320534223706, "loss": 1.32, "step": 97 }, { "epoch": 0.0144091159713288, "grad_norm": 0.4980703294277191, "learning_rate": 0.00019385642737896494, "loss": 1.0993, "step": 98 }, { "epoch": 0.014556147766954604, "grad_norm": 0.6439472436904907, "learning_rate": 0.0001937896494156928, "loss": 1.4087, "step": 99 }, { "epoch": 0.014703179562580408, "grad_norm": 0.6431806683540344, "learning_rate": 0.0001937228714524207, "loss": 1.7431, "step": 100 }, { "epoch": 0.014850211358206212, "grad_norm": 0.6421759128570557, "learning_rate": 0.00019365609348914858, "loss": 1.4901, "step": 101 }, { "epoch": 0.014997243153832017, "grad_norm": 0.5457899570465088, "learning_rate": 0.00019358931552587646, "loss": 1.7132, "step": 102 }, { "epoch": 0.01514427494945782, "grad_norm": 0.659453809261322, "learning_rate": 0.00019352253756260436, "loss": 1.2335, "step": 103 }, { "epoch": 0.015291306745083624, "grad_norm": 0.5520448088645935, "learning_rate": 0.00019345575959933223, "loss": 1.3766, "step": 104 }, { "epoch": 0.015438338540709428, "grad_norm": 0.6720184087753296, "learning_rate": 0.00019338898163606013, "loss": 1.3576, "step": 105 }, { "epoch": 0.015585370336335232, "grad_norm": 0.7560919523239136, "learning_rate": 0.000193322203672788, "loss": 1.3078, "step": 106 }, { "epoch": 0.015732402131961035, "grad_norm": 0.5984731912612915, "learning_rate": 0.00019325542570951588, "loss": 1.4802, "step": 107 }, { "epoch": 0.01587943392758684, "grad_norm": 0.823121190071106, "learning_rate": 0.00019318864774624375, "loss": 1.2747, "step": 108 }, { "epoch": 0.016026465723212646, "grad_norm": 0.8534990549087524, "learning_rate": 0.00019312186978297162, "loss": 1.1535, "step": 109 }, { "epoch": 0.01617349751883845, "grad_norm": 0.5633559226989746, "learning_rate": 0.00019305509181969952, "loss": 1.6752, "step": 110 }, { "epoch": 0.016320529314464254, "grad_norm": 0.6144236326217651, "learning_rate": 0.0001929883138564274, "loss": 1.7058, "step": 111 }, { "epoch": 0.016467561110090057, "grad_norm": 0.5533897280693054, "learning_rate": 0.00019292153589315527, "loss": 1.2716, "step": 112 }, { "epoch": 0.01661459290571586, "grad_norm": 0.7452273964881897, "learning_rate": 0.00019285475792988314, "loss": 1.1707, "step": 113 }, { "epoch": 0.016761624701341665, "grad_norm": 0.4465287923812866, "learning_rate": 0.00019278797996661101, "loss": 1.5021, "step": 114 }, { "epoch": 0.016908656496967468, "grad_norm": 0.4912537634372711, "learning_rate": 0.0001927212020033389, "loss": 1.6607, "step": 115 }, { "epoch": 0.017055688292593272, "grad_norm": 0.5073690414428711, "learning_rate": 0.0001926544240400668, "loss": 1.3443, "step": 116 }, { "epoch": 0.01720272008821908, "grad_norm": 0.5637887716293335, "learning_rate": 0.00019258764607679466, "loss": 1.5631, "step": 117 }, { "epoch": 0.017349751883844883, "grad_norm": 0.6547496318817139, "learning_rate": 0.00019252086811352253, "loss": 1.4114, "step": 118 }, { "epoch": 0.017496783679470686, "grad_norm": 0.6922225952148438, "learning_rate": 0.0001924540901502504, "loss": 1.7613, "step": 119 }, { "epoch": 0.01764381547509649, "grad_norm": 0.7733402252197266, "learning_rate": 0.0001923873121869783, "loss": 1.137, "step": 120 }, { "epoch": 0.017790847270722294, "grad_norm": 0.5699784755706787, "learning_rate": 0.00019232053422370618, "loss": 1.3615, "step": 121 }, { "epoch": 0.017937879066348097, "grad_norm": 0.5915058255195618, "learning_rate": 0.00019225375626043408, "loss": 1.4088, "step": 122 }, { "epoch": 0.0180849108619739, "grad_norm": 0.743571400642395, "learning_rate": 0.00019218697829716195, "loss": 1.0054, "step": 123 }, { "epoch": 0.018231942657599705, "grad_norm": 0.6367533802986145, "learning_rate": 0.00019212020033388983, "loss": 1.1425, "step": 124 }, { "epoch": 0.01837897445322551, "grad_norm": 0.4887382686138153, "learning_rate": 0.0001920534223706177, "loss": 1.327, "step": 125 }, { "epoch": 0.018526006248851316, "grad_norm": 0.48104327917099, "learning_rate": 0.0001919866444073456, "loss": 1.6652, "step": 126 }, { "epoch": 0.01867303804447712, "grad_norm": 0.6146672368049622, "learning_rate": 0.00019191986644407347, "loss": 1.5408, "step": 127 }, { "epoch": 0.018820069840102923, "grad_norm": 0.5802967548370361, "learning_rate": 0.00019185308848080135, "loss": 1.147, "step": 128 }, { "epoch": 0.018967101635728727, "grad_norm": 0.4917391538619995, "learning_rate": 0.00019178631051752922, "loss": 1.4103, "step": 129 }, { "epoch": 0.01911413343135453, "grad_norm": 0.7169671058654785, "learning_rate": 0.0001917195325542571, "loss": 1.3954, "step": 130 }, { "epoch": 0.019261165226980334, "grad_norm": 0.6534145474433899, "learning_rate": 0.00019165275459098497, "loss": 1.113, "step": 131 }, { "epoch": 0.019408197022606138, "grad_norm": 0.589989423751831, "learning_rate": 0.00019158597662771287, "loss": 1.5682, "step": 132 }, { "epoch": 0.01955522881823194, "grad_norm": 0.5871070623397827, "learning_rate": 0.00019151919866444074, "loss": 1.3837, "step": 133 }, { "epoch": 0.019702260613857745, "grad_norm": 0.5129367113113403, "learning_rate": 0.0001914524207011686, "loss": 1.6626, "step": 134 }, { "epoch": 0.019849292409483552, "grad_norm": 0.5821259021759033, "learning_rate": 0.0001913856427378965, "loss": 1.1617, "step": 135 }, { "epoch": 0.019996324205109356, "grad_norm": 0.6629579067230225, "learning_rate": 0.0001913188647746244, "loss": 1.4072, "step": 136 }, { "epoch": 0.02014335600073516, "grad_norm": 0.6667262315750122, "learning_rate": 0.00019125208681135226, "loss": 1.2727, "step": 137 }, { "epoch": 0.020290387796360963, "grad_norm": 0.6690664291381836, "learning_rate": 0.00019118530884808016, "loss": 1.238, "step": 138 }, { "epoch": 0.020437419591986767, "grad_norm": 0.5745186805725098, "learning_rate": 0.00019111853088480803, "loss": 1.6258, "step": 139 }, { "epoch": 0.02058445138761257, "grad_norm": 0.7720161080360413, "learning_rate": 0.0001910517529215359, "loss": 1.3723, "step": 140 }, { "epoch": 0.020731483183238374, "grad_norm": 0.7668603658676147, "learning_rate": 0.00019098497495826378, "loss": 1.3717, "step": 141 }, { "epoch": 0.020878514978864178, "grad_norm": 0.6401954889297485, "learning_rate": 0.00019091819699499168, "loss": 1.5481, "step": 142 }, { "epoch": 0.021025546774489985, "grad_norm": 0.5298405289649963, "learning_rate": 0.00019085141903171955, "loss": 1.2448, "step": 143 }, { "epoch": 0.02117257857011579, "grad_norm": 0.5826001763343811, "learning_rate": 0.00019078464106844743, "loss": 1.2388, "step": 144 }, { "epoch": 0.021319610365741593, "grad_norm": 0.6427043080329895, "learning_rate": 0.0001907178631051753, "loss": 1.5923, "step": 145 }, { "epoch": 0.021466642161367396, "grad_norm": 0.5886897444725037, "learning_rate": 0.00019065108514190317, "loss": 1.3037, "step": 146 }, { "epoch": 0.0216136739569932, "grad_norm": 0.6355546116828918, "learning_rate": 0.00019058430717863107, "loss": 1.4288, "step": 147 }, { "epoch": 0.021760705752619004, "grad_norm": 0.6302939057350159, "learning_rate": 0.00019051752921535895, "loss": 1.2263, "step": 148 }, { "epoch": 0.021907737548244807, "grad_norm": 0.6269422173500061, "learning_rate": 0.00019045075125208682, "loss": 1.3918, "step": 149 }, { "epoch": 0.02205476934387061, "grad_norm": 0.7319702506065369, "learning_rate": 0.0001903839732888147, "loss": 1.105, "step": 150 }, { "epoch": 0.022201801139496415, "grad_norm": 0.6304236650466919, "learning_rate": 0.00019031719532554257, "loss": 1.3691, "step": 151 }, { "epoch": 0.022348832935122222, "grad_norm": 0.5159934759140015, "learning_rate": 0.00019025041736227044, "loss": 1.234, "step": 152 }, { "epoch": 0.022495864730748025, "grad_norm": 0.6090654134750366, "learning_rate": 0.00019018363939899834, "loss": 1.2624, "step": 153 }, { "epoch": 0.02264289652637383, "grad_norm": 0.5876306891441345, "learning_rate": 0.0001901168614357262, "loss": 1.6018, "step": 154 }, { "epoch": 0.022789928321999633, "grad_norm": 0.5715771913528442, "learning_rate": 0.0001900500834724541, "loss": 1.6971, "step": 155 }, { "epoch": 0.022936960117625436, "grad_norm": 0.45397239923477173, "learning_rate": 0.00018998330550918199, "loss": 1.8734, "step": 156 }, { "epoch": 0.02308399191325124, "grad_norm": 0.43336284160614014, "learning_rate": 0.00018991652754590986, "loss": 1.7395, "step": 157 }, { "epoch": 0.023231023708877044, "grad_norm": 0.4989621639251709, "learning_rate": 0.00018984974958263776, "loss": 1.8132, "step": 158 }, { "epoch": 0.023378055504502848, "grad_norm": 0.5289480090141296, "learning_rate": 0.00018978297161936563, "loss": 1.3789, "step": 159 }, { "epoch": 0.02352508730012865, "grad_norm": 0.6862265467643738, "learning_rate": 0.0001897161936560935, "loss": 1.298, "step": 160 }, { "epoch": 0.02367211909575446, "grad_norm": 0.5551686882972717, "learning_rate": 0.00018964941569282138, "loss": 1.4486, "step": 161 }, { "epoch": 0.023819150891380262, "grad_norm": 0.6883800029754639, "learning_rate": 0.00018958263772954925, "loss": 0.9535, "step": 162 }, { "epoch": 0.023966182687006066, "grad_norm": 0.48476099967956543, "learning_rate": 0.00018951585976627715, "loss": 1.2439, "step": 163 }, { "epoch": 0.02411321448263187, "grad_norm": 0.5294777750968933, "learning_rate": 0.00018944908180300502, "loss": 1.4204, "step": 164 }, { "epoch": 0.024260246278257673, "grad_norm": 0.5384734869003296, "learning_rate": 0.0001893823038397329, "loss": 1.3798, "step": 165 }, { "epoch": 0.024407278073883477, "grad_norm": 0.5317145586013794, "learning_rate": 0.00018931552587646077, "loss": 1.3735, "step": 166 }, { "epoch": 0.02455430986950928, "grad_norm": 0.6459450721740723, "learning_rate": 0.00018924874791318864, "loss": 1.1118, "step": 167 }, { "epoch": 0.024701341665135084, "grad_norm": 0.5570773482322693, "learning_rate": 0.00018918196994991652, "loss": 1.4296, "step": 168 }, { "epoch": 0.02484837346076089, "grad_norm": 0.7163698673248291, "learning_rate": 0.00018911519198664442, "loss": 1.167, "step": 169 }, { "epoch": 0.024995405256386695, "grad_norm": 0.6477950215339661, "learning_rate": 0.0001890484140233723, "loss": 1.4011, "step": 170 }, { "epoch": 0.0251424370520125, "grad_norm": 0.44774386286735535, "learning_rate": 0.00018898163606010016, "loss": 1.5681, "step": 171 }, { "epoch": 0.025289468847638302, "grad_norm": 0.6403729319572449, "learning_rate": 0.00018891485809682806, "loss": 1.55, "step": 172 }, { "epoch": 0.025436500643264106, "grad_norm": 0.7885447144508362, "learning_rate": 0.00018884808013355594, "loss": 1.037, "step": 173 }, { "epoch": 0.02558353243888991, "grad_norm": 0.4331233501434326, "learning_rate": 0.00018878130217028384, "loss": 1.3681, "step": 174 }, { "epoch": 0.025730564234515713, "grad_norm": 0.4383181631565094, "learning_rate": 0.0001887145242070117, "loss": 1.5162, "step": 175 }, { "epoch": 0.025877596030141517, "grad_norm": 0.6594043970108032, "learning_rate": 0.00018864774624373958, "loss": 1.4231, "step": 176 }, { "epoch": 0.02602462782576732, "grad_norm": 0.6030335426330566, "learning_rate": 0.00018858096828046746, "loss": 1.3048, "step": 177 }, { "epoch": 0.026171659621393128, "grad_norm": 0.5374215245246887, "learning_rate": 0.00018851419031719533, "loss": 1.4877, "step": 178 }, { "epoch": 0.02631869141701893, "grad_norm": 0.43993502855300903, "learning_rate": 0.00018844741235392323, "loss": 1.2635, "step": 179 }, { "epoch": 0.026465723212644735, "grad_norm": 0.8226630687713623, "learning_rate": 0.0001883806343906511, "loss": 1.1588, "step": 180 }, { "epoch": 0.02661275500827054, "grad_norm": 0.6660661697387695, "learning_rate": 0.00018831385642737898, "loss": 1.2644, "step": 181 }, { "epoch": 0.026759786803896343, "grad_norm": 0.4528328478336334, "learning_rate": 0.00018824707846410685, "loss": 1.142, "step": 182 }, { "epoch": 0.026906818599522146, "grad_norm": 1.0442190170288086, "learning_rate": 0.00018818030050083472, "loss": 1.2908, "step": 183 }, { "epoch": 0.02705385039514795, "grad_norm": 0.6550003886222839, "learning_rate": 0.0001881135225375626, "loss": 1.4087, "step": 184 }, { "epoch": 0.027200882190773754, "grad_norm": 0.8150153756141663, "learning_rate": 0.0001880467445742905, "loss": 1.2673, "step": 185 }, { "epoch": 0.027347913986399557, "grad_norm": 0.6543192267417908, "learning_rate": 0.00018797996661101837, "loss": 1.2909, "step": 186 }, { "epoch": 0.027494945782025364, "grad_norm": 0.5765683650970459, "learning_rate": 0.00018791318864774624, "loss": 1.6012, "step": 187 }, { "epoch": 0.027641977577651168, "grad_norm": 0.6456362009048462, "learning_rate": 0.00018784641068447412, "loss": 1.157, "step": 188 }, { "epoch": 0.027789009373276972, "grad_norm": 0.7525225877761841, "learning_rate": 0.00018777963272120202, "loss": 1.5516, "step": 189 }, { "epoch": 0.027936041168902775, "grad_norm": 0.5114544630050659, "learning_rate": 0.0001877128547579299, "loss": 1.338, "step": 190 }, { "epoch": 0.02808307296452858, "grad_norm": 0.6324225664138794, "learning_rate": 0.0001876460767946578, "loss": 1.3978, "step": 191 }, { "epoch": 0.028230104760154383, "grad_norm": 0.5649280548095703, "learning_rate": 0.00018757929883138566, "loss": 1.4375, "step": 192 }, { "epoch": 0.028377136555780186, "grad_norm": 0.6387972831726074, "learning_rate": 0.00018751252086811354, "loss": 1.1978, "step": 193 }, { "epoch": 0.02852416835140599, "grad_norm": 0.6068618297576904, "learning_rate": 0.0001874457429048414, "loss": 1.3414, "step": 194 }, { "epoch": 0.028671200147031797, "grad_norm": 0.6169670224189758, "learning_rate": 0.0001873789649415693, "loss": 1.225, "step": 195 }, { "epoch": 0.0288182319426576, "grad_norm": 0.6766074895858765, "learning_rate": 0.00018731218697829718, "loss": 1.4629, "step": 196 }, { "epoch": 0.028965263738283405, "grad_norm": 0.5975071787834167, "learning_rate": 0.00018724540901502506, "loss": 1.4079, "step": 197 }, { "epoch": 0.02911229553390921, "grad_norm": 0.6850729584693909, "learning_rate": 0.00018717863105175293, "loss": 1.2511, "step": 198 }, { "epoch": 0.029259327329535012, "grad_norm": 0.6574251651763916, "learning_rate": 0.0001871118530884808, "loss": 1.2539, "step": 199 }, { "epoch": 0.029406359125160816, "grad_norm": 0.4810081720352173, "learning_rate": 0.00018704507512520868, "loss": 1.4736, "step": 200 }, { "epoch": 0.02955339092078662, "grad_norm": 0.5275496244430542, "learning_rate": 0.00018697829716193658, "loss": 1.4353, "step": 201 }, { "epoch": 0.029700422716412423, "grad_norm": 0.5854094624519348, "learning_rate": 0.00018691151919866445, "loss": 1.2817, "step": 202 }, { "epoch": 0.029847454512038227, "grad_norm": 0.6764337420463562, "learning_rate": 0.00018684474123539232, "loss": 1.3159, "step": 203 }, { "epoch": 0.029994486307664034, "grad_norm": 0.5491044521331787, "learning_rate": 0.0001867779632721202, "loss": 1.4164, "step": 204 }, { "epoch": 0.030141518103289838, "grad_norm": 0.5337619185447693, "learning_rate": 0.00018671118530884807, "loss": 1.6589, "step": 205 }, { "epoch": 0.03028854989891564, "grad_norm": 0.7253937721252441, "learning_rate": 0.00018664440734557597, "loss": 1.3394, "step": 206 }, { "epoch": 0.030435581694541445, "grad_norm": 0.569826066493988, "learning_rate": 0.00018657762938230384, "loss": 1.8183, "step": 207 }, { "epoch": 0.03058261349016725, "grad_norm": 0.664283275604248, "learning_rate": 0.00018651085141903174, "loss": 1.2228, "step": 208 }, { "epoch": 0.030729645285793052, "grad_norm": 0.8307971954345703, "learning_rate": 0.00018644407345575962, "loss": 0.9359, "step": 209 }, { "epoch": 0.030876677081418856, "grad_norm": 0.5159261226654053, "learning_rate": 0.0001863772954924875, "loss": 1.5459, "step": 210 }, { "epoch": 0.03102370887704466, "grad_norm": 0.508254885673523, "learning_rate": 0.0001863105175292154, "loss": 1.2704, "step": 211 }, { "epoch": 0.031170740672670463, "grad_norm": 0.7121203541755676, "learning_rate": 0.00018624373956594326, "loss": 1.5574, "step": 212 }, { "epoch": 0.03131777246829627, "grad_norm": 0.45061975717544556, "learning_rate": 0.00018617696160267113, "loss": 1.5393, "step": 213 }, { "epoch": 0.03146480426392207, "grad_norm": 0.6526356935501099, "learning_rate": 0.000186110183639399, "loss": 1.3094, "step": 214 }, { "epoch": 0.03161183605954788, "grad_norm": 0.6054556369781494, "learning_rate": 0.00018604340567612688, "loss": 1.3197, "step": 215 }, { "epoch": 0.03175886785517368, "grad_norm": 0.43881675601005554, "learning_rate": 0.00018597662771285475, "loss": 1.2096, "step": 216 }, { "epoch": 0.031905899650799485, "grad_norm": 0.7867928743362427, "learning_rate": 0.00018590984974958265, "loss": 1.3241, "step": 217 }, { "epoch": 0.03205293144642529, "grad_norm": 0.5742908120155334, "learning_rate": 0.00018584307178631053, "loss": 1.5102, "step": 218 }, { "epoch": 0.03219996324205109, "grad_norm": 0.63670414686203, "learning_rate": 0.0001857762938230384, "loss": 1.4485, "step": 219 }, { "epoch": 0.0323469950376769, "grad_norm": 0.6961398720741272, "learning_rate": 0.00018570951585976627, "loss": 1.287, "step": 220 }, { "epoch": 0.0324940268333027, "grad_norm": 0.5458360910415649, "learning_rate": 0.00018564273789649415, "loss": 1.079, "step": 221 }, { "epoch": 0.03264105862892851, "grad_norm": 0.7383314371109009, "learning_rate": 0.00018557595993322205, "loss": 1.3491, "step": 222 }, { "epoch": 0.03278809042455431, "grad_norm": 0.7552614212036133, "learning_rate": 0.00018550918196994992, "loss": 1.037, "step": 223 }, { "epoch": 0.032935122220180114, "grad_norm": 0.5258327722549438, "learning_rate": 0.0001854424040066778, "loss": 1.3472, "step": 224 }, { "epoch": 0.033082154015805915, "grad_norm": 0.45732975006103516, "learning_rate": 0.0001853756260434057, "loss": 1.3208, "step": 225 }, { "epoch": 0.03322918581143172, "grad_norm": 0.4905901849269867, "learning_rate": 0.00018530884808013357, "loss": 1.2283, "step": 226 }, { "epoch": 0.03337621760705753, "grad_norm": 0.6717547178268433, "learning_rate": 0.00018524207011686147, "loss": 0.9811, "step": 227 }, { "epoch": 0.03352324940268333, "grad_norm": 0.5436676144599915, "learning_rate": 0.00018517529215358934, "loss": 1.214, "step": 228 }, { "epoch": 0.033670281198309136, "grad_norm": 0.6575579643249512, "learning_rate": 0.00018510851419031721, "loss": 1.0066, "step": 229 }, { "epoch": 0.033817312993934937, "grad_norm": 0.5103508234024048, "learning_rate": 0.0001850417362270451, "loss": 1.3344, "step": 230 }, { "epoch": 0.033964344789560744, "grad_norm": 0.43599745631217957, "learning_rate": 0.00018497495826377296, "loss": 1.4907, "step": 231 }, { "epoch": 0.034111376585186544, "grad_norm": 0.5532944798469543, "learning_rate": 0.00018490818030050083, "loss": 1.4767, "step": 232 }, { "epoch": 0.03425840838081235, "grad_norm": 0.47670668363571167, "learning_rate": 0.00018484140233722873, "loss": 1.5596, "step": 233 }, { "epoch": 0.03440544017643816, "grad_norm": 0.632929265499115, "learning_rate": 0.0001847746243739566, "loss": 1.3332, "step": 234 }, { "epoch": 0.03455247197206396, "grad_norm": 0.6273097991943359, "learning_rate": 0.00018470784641068448, "loss": 1.3206, "step": 235 }, { "epoch": 0.034699503767689766, "grad_norm": 0.5928819179534912, "learning_rate": 0.00018464106844741235, "loss": 1.3895, "step": 236 }, { "epoch": 0.034846535563315566, "grad_norm": 0.47757020592689514, "learning_rate": 0.00018457429048414023, "loss": 1.83, "step": 237 }, { "epoch": 0.03499356735894137, "grad_norm": 0.38154080510139465, "learning_rate": 0.00018450751252086813, "loss": 1.4356, "step": 238 }, { "epoch": 0.03514059915456717, "grad_norm": 0.9369202256202698, "learning_rate": 0.000184440734557596, "loss": 1.3064, "step": 239 }, { "epoch": 0.03528763095019298, "grad_norm": 0.5656161308288574, "learning_rate": 0.00018437395659432387, "loss": 0.9912, "step": 240 }, { "epoch": 0.03543466274581878, "grad_norm": 0.5000745058059692, "learning_rate": 0.00018430717863105175, "loss": 1.2032, "step": 241 }, { "epoch": 0.03558169454144459, "grad_norm": 0.5016139149665833, "learning_rate": 0.00018424040066777965, "loss": 1.2795, "step": 242 }, { "epoch": 0.035728726337070395, "grad_norm": 0.5286661386489868, "learning_rate": 0.00018417362270450752, "loss": 1.233, "step": 243 }, { "epoch": 0.035875758132696195, "grad_norm": 0.448910653591156, "learning_rate": 0.00018410684474123542, "loss": 1.2448, "step": 244 }, { "epoch": 0.036022789928322, "grad_norm": 0.5130086541175842, "learning_rate": 0.0001840400667779633, "loss": 1.3386, "step": 245 }, { "epoch": 0.0361698217239478, "grad_norm": 0.4456808567047119, "learning_rate": 0.00018397328881469117, "loss": 1.502, "step": 246 }, { "epoch": 0.03631685351957361, "grad_norm": 0.47314709424972534, "learning_rate": 0.00018390651085141904, "loss": 1.1558, "step": 247 }, { "epoch": 0.03646388531519941, "grad_norm": 0.6629585027694702, "learning_rate": 0.0001838397328881469, "loss": 1.2716, "step": 248 }, { "epoch": 0.03661091711082522, "grad_norm": 0.49307113885879517, "learning_rate": 0.0001837729549248748, "loss": 1.6779, "step": 249 }, { "epoch": 0.03675794890645102, "grad_norm": 0.5717016458511353, "learning_rate": 0.00018370617696160269, "loss": 1.2835, "step": 250 }, { "epoch": 0.036904980702076824, "grad_norm": 0.6283065676689148, "learning_rate": 0.00018363939899833056, "loss": 1.556, "step": 251 }, { "epoch": 0.03705201249770263, "grad_norm": 0.5064134001731873, "learning_rate": 0.00018357262103505843, "loss": 1.3747, "step": 252 }, { "epoch": 0.03719904429332843, "grad_norm": 0.563591480255127, "learning_rate": 0.0001835058430717863, "loss": 1.5166, "step": 253 }, { "epoch": 0.03734607608895424, "grad_norm": 0.6295974254608154, "learning_rate": 0.0001834390651085142, "loss": 1.6529, "step": 254 }, { "epoch": 0.03749310788458004, "grad_norm": 0.678810715675354, "learning_rate": 0.00018337228714524208, "loss": 1.4293, "step": 255 }, { "epoch": 0.037640139680205846, "grad_norm": 0.5903933048248291, "learning_rate": 0.00018330550918196995, "loss": 1.2373, "step": 256 }, { "epoch": 0.037787171475831646, "grad_norm": 0.5947807431221008, "learning_rate": 0.00018323873121869782, "loss": 1.1279, "step": 257 }, { "epoch": 0.03793420327145745, "grad_norm": 0.3720698058605194, "learning_rate": 0.0001831719532554257, "loss": 1.1771, "step": 258 }, { "epoch": 0.038081235067083254, "grad_norm": 0.6865638494491577, "learning_rate": 0.0001831051752921536, "loss": 1.3519, "step": 259 }, { "epoch": 0.03822826686270906, "grad_norm": 0.40888139605522156, "learning_rate": 0.00018303839732888147, "loss": 1.5246, "step": 260 }, { "epoch": 0.03837529865833487, "grad_norm": 0.4346977174282074, "learning_rate": 0.00018297161936560937, "loss": 1.7581, "step": 261 }, { "epoch": 0.03852233045396067, "grad_norm": 0.7074586749076843, "learning_rate": 0.00018290484140233724, "loss": 1.1929, "step": 262 }, { "epoch": 0.038669362249586475, "grad_norm": 0.5018094182014465, "learning_rate": 0.00018283806343906512, "loss": 1.2372, "step": 263 }, { "epoch": 0.038816394045212275, "grad_norm": 0.683417797088623, "learning_rate": 0.000182771285475793, "loss": 1.2846, "step": 264 }, { "epoch": 0.03896342584083808, "grad_norm": 0.41126397252082825, "learning_rate": 0.0001827045075125209, "loss": 1.3731, "step": 265 }, { "epoch": 0.03911045763646388, "grad_norm": 0.7288692593574524, "learning_rate": 0.00018263772954924876, "loss": 1.338, "step": 266 }, { "epoch": 0.03925748943208969, "grad_norm": 0.45149967074394226, "learning_rate": 0.00018257095158597664, "loss": 1.2097, "step": 267 }, { "epoch": 0.03940452122771549, "grad_norm": 0.5632445216178894, "learning_rate": 0.0001825041736227045, "loss": 1.1596, "step": 268 }, { "epoch": 0.0395515530233413, "grad_norm": 0.76249098777771, "learning_rate": 0.00018243739565943238, "loss": 1.2311, "step": 269 }, { "epoch": 0.039698584818967105, "grad_norm": 0.44647330045700073, "learning_rate": 0.00018237061769616028, "loss": 1.7517, "step": 270 }, { "epoch": 0.039845616614592905, "grad_norm": 0.45566919445991516, "learning_rate": 0.00018230383973288816, "loss": 1.4134, "step": 271 }, { "epoch": 0.03999264841021871, "grad_norm": 0.5739283561706543, "learning_rate": 0.00018223706176961603, "loss": 1.4857, "step": 272 }, { "epoch": 0.04013968020584451, "grad_norm": 0.623755931854248, "learning_rate": 0.0001821702838063439, "loss": 1.1731, "step": 273 }, { "epoch": 0.04028671200147032, "grad_norm": 0.5324702262878418, "learning_rate": 0.00018210350584307178, "loss": 1.1845, "step": 274 }, { "epoch": 0.04043374379709612, "grad_norm": 0.4314653277397156, "learning_rate": 0.00018203672787979968, "loss": 1.2421, "step": 275 }, { "epoch": 0.04058077559272193, "grad_norm": 0.5722083449363708, "learning_rate": 0.00018196994991652755, "loss": 1.8583, "step": 276 }, { "epoch": 0.04072780738834773, "grad_norm": 0.713344931602478, "learning_rate": 0.00018190317195325542, "loss": 1.0944, "step": 277 }, { "epoch": 0.040874839183973534, "grad_norm": 0.49877801537513733, "learning_rate": 0.00018183639398998332, "loss": 1.6714, "step": 278 }, { "epoch": 0.04102187097959934, "grad_norm": 0.6253514885902405, "learning_rate": 0.0001817696160267112, "loss": 1.3567, "step": 279 }, { "epoch": 0.04116890277522514, "grad_norm": 0.6159173250198364, "learning_rate": 0.0001817028380634391, "loss": 1.537, "step": 280 }, { "epoch": 0.04131593457085095, "grad_norm": 0.662022590637207, "learning_rate": 0.00018163606010016697, "loss": 1.3539, "step": 281 }, { "epoch": 0.04146296636647675, "grad_norm": 0.5314798951148987, "learning_rate": 0.00018156928213689484, "loss": 1.4844, "step": 282 }, { "epoch": 0.041609998162102556, "grad_norm": 0.5227733254432678, "learning_rate": 0.00018150250417362272, "loss": 1.327, "step": 283 }, { "epoch": 0.041757029957728356, "grad_norm": 0.6123785972595215, "learning_rate": 0.0001814357262103506, "loss": 1.5786, "step": 284 }, { "epoch": 0.04190406175335416, "grad_norm": 0.46761125326156616, "learning_rate": 0.00018136894824707846, "loss": 1.0189, "step": 285 }, { "epoch": 0.04205109354897997, "grad_norm": 0.5482293963432312, "learning_rate": 0.00018130217028380636, "loss": 1.2721, "step": 286 }, { "epoch": 0.04219812534460577, "grad_norm": 0.7054635882377625, "learning_rate": 0.00018123539232053424, "loss": 1.3526, "step": 287 }, { "epoch": 0.04234515714023158, "grad_norm": 0.6737436056137085, "learning_rate": 0.0001811686143572621, "loss": 1.1784, "step": 288 }, { "epoch": 0.04249218893585738, "grad_norm": 0.6014431715011597, "learning_rate": 0.00018110183639398998, "loss": 1.5513, "step": 289 }, { "epoch": 0.042639220731483185, "grad_norm": 0.5490061640739441, "learning_rate": 0.00018103505843071786, "loss": 1.6406, "step": 290 }, { "epoch": 0.042786252527108985, "grad_norm": 0.7721180319786072, "learning_rate": 0.00018096828046744576, "loss": 1.3798, "step": 291 }, { "epoch": 0.04293328432273479, "grad_norm": 0.5291112065315247, "learning_rate": 0.00018090150250417363, "loss": 1.6346, "step": 292 }, { "epoch": 0.04308031611836059, "grad_norm": 0.7885229587554932, "learning_rate": 0.0001808347245409015, "loss": 1.4546, "step": 293 }, { "epoch": 0.0432273479139864, "grad_norm": 0.4781096875667572, "learning_rate": 0.00018076794657762938, "loss": 1.4728, "step": 294 }, { "epoch": 0.04337437970961221, "grad_norm": 0.7703851461410522, "learning_rate": 0.00018070116861435728, "loss": 1.0396, "step": 295 }, { "epoch": 0.04352141150523801, "grad_norm": 0.8052161931991577, "learning_rate": 0.00018063439065108515, "loss": 1.055, "step": 296 }, { "epoch": 0.043668443300863814, "grad_norm": 0.5105237364768982, "learning_rate": 0.00018056761268781305, "loss": 1.6541, "step": 297 }, { "epoch": 0.043815475096489614, "grad_norm": 0.8041142821311951, "learning_rate": 0.00018050083472454092, "loss": 1.2651, "step": 298 }, { "epoch": 0.04396250689211542, "grad_norm": 0.5732051730155945, "learning_rate": 0.0001804340567612688, "loss": 1.5042, "step": 299 }, { "epoch": 0.04410953868774122, "grad_norm": 0.5450543761253357, "learning_rate": 0.00018036727879799667, "loss": 1.3799, "step": 300 }, { "epoch": 0.04425657048336703, "grad_norm": 0.628911018371582, "learning_rate": 0.00018030050083472454, "loss": 1.386, "step": 301 }, { "epoch": 0.04440360227899283, "grad_norm": 0.6892708539962769, "learning_rate": 0.00018023372287145244, "loss": 1.1957, "step": 302 }, { "epoch": 0.044550634074618636, "grad_norm": 0.5099971294403076, "learning_rate": 0.00018016694490818031, "loss": 0.9891, "step": 303 }, { "epoch": 0.044697665870244443, "grad_norm": 0.5342345237731934, "learning_rate": 0.0001801001669449082, "loss": 1.5303, "step": 304 }, { "epoch": 0.044844697665870244, "grad_norm": 0.5378958582878113, "learning_rate": 0.00018003338898163606, "loss": 1.505, "step": 305 }, { "epoch": 0.04499172946149605, "grad_norm": 0.6278736591339111, "learning_rate": 0.00017996661101836393, "loss": 1.1873, "step": 306 }, { "epoch": 0.04513876125712185, "grad_norm": 0.48578763008117676, "learning_rate": 0.00017989983305509183, "loss": 1.5699, "step": 307 }, { "epoch": 0.04528579305274766, "grad_norm": 0.5329797863960266, "learning_rate": 0.0001798330550918197, "loss": 1.3194, "step": 308 }, { "epoch": 0.04543282484837346, "grad_norm": 0.5952385067939758, "learning_rate": 0.00017976627712854758, "loss": 1.2256, "step": 309 }, { "epoch": 0.045579856643999266, "grad_norm": 1.0115389823913574, "learning_rate": 0.00017969949916527545, "loss": 1.1456, "step": 310 }, { "epoch": 0.045726888439625066, "grad_norm": 0.5174033045768738, "learning_rate": 0.00017963272120200333, "loss": 1.2463, "step": 311 }, { "epoch": 0.04587392023525087, "grad_norm": 0.529289960861206, "learning_rate": 0.00017956594323873123, "loss": 1.7541, "step": 312 }, { "epoch": 0.04602095203087668, "grad_norm": 0.6208413243293762, "learning_rate": 0.0001794991652754591, "loss": 1.1919, "step": 313 }, { "epoch": 0.04616798382650248, "grad_norm": 0.44619953632354736, "learning_rate": 0.000179432387312187, "loss": 1.7906, "step": 314 }, { "epoch": 0.04631501562212829, "grad_norm": 0.5451804995536804, "learning_rate": 0.00017936560934891487, "loss": 1.5572, "step": 315 }, { "epoch": 0.04646204741775409, "grad_norm": 0.6904172897338867, "learning_rate": 0.00017929883138564275, "loss": 1.1451, "step": 316 }, { "epoch": 0.046609079213379895, "grad_norm": 0.5294027924537659, "learning_rate": 0.00017923205342237062, "loss": 1.5372, "step": 317 }, { "epoch": 0.046756111009005695, "grad_norm": 0.5680206418037415, "learning_rate": 0.00017916527545909852, "loss": 1.2343, "step": 318 }, { "epoch": 0.0469031428046315, "grad_norm": 0.4928724467754364, "learning_rate": 0.0001790984974958264, "loss": 1.3485, "step": 319 }, { "epoch": 0.0470501746002573, "grad_norm": 0.52439945936203, "learning_rate": 0.00017903171953255427, "loss": 1.2242, "step": 320 }, { "epoch": 0.04719720639588311, "grad_norm": 0.5181280374526978, "learning_rate": 0.00017896494156928214, "loss": 1.2206, "step": 321 }, { "epoch": 0.04734423819150892, "grad_norm": 0.6266176104545593, "learning_rate": 0.00017889816360601, "loss": 1.2191, "step": 322 }, { "epoch": 0.04749126998713472, "grad_norm": 0.6882799863815308, "learning_rate": 0.0001788313856427379, "loss": 1.2452, "step": 323 }, { "epoch": 0.047638301782760524, "grad_norm": 0.6466065049171448, "learning_rate": 0.0001787646076794658, "loss": 1.1083, "step": 324 }, { "epoch": 0.047785333578386324, "grad_norm": 0.49232763051986694, "learning_rate": 0.00017869782971619366, "loss": 1.4433, "step": 325 }, { "epoch": 0.04793236537401213, "grad_norm": 0.6997049450874329, "learning_rate": 0.00017863105175292153, "loss": 1.1832, "step": 326 }, { "epoch": 0.04807939716963793, "grad_norm": 0.588621973991394, "learning_rate": 0.0001785642737896494, "loss": 1.2729, "step": 327 }, { "epoch": 0.04822642896526374, "grad_norm": 0.5556900501251221, "learning_rate": 0.0001784974958263773, "loss": 1.1033, "step": 328 }, { "epoch": 0.04837346076088954, "grad_norm": 0.5142942070960999, "learning_rate": 0.00017843071786310518, "loss": 1.2296, "step": 329 }, { "epoch": 0.048520492556515346, "grad_norm": 0.4947022795677185, "learning_rate": 0.00017836393989983305, "loss": 1.746, "step": 330 }, { "epoch": 0.04866752435214115, "grad_norm": 0.5721028447151184, "learning_rate": 0.00017829716193656095, "loss": 1.3064, "step": 331 }, { "epoch": 0.04881455614776695, "grad_norm": 0.42991045117378235, "learning_rate": 0.00017823038397328883, "loss": 1.6891, "step": 332 }, { "epoch": 0.04896158794339276, "grad_norm": 0.7306320667266846, "learning_rate": 0.0001781636060100167, "loss": 1.3359, "step": 333 }, { "epoch": 0.04910861973901856, "grad_norm": 0.6763674020767212, "learning_rate": 0.0001780968280467446, "loss": 1.0086, "step": 334 }, { "epoch": 0.04925565153464437, "grad_norm": 0.5117493867874146, "learning_rate": 0.00017803005008347247, "loss": 1.6068, "step": 335 }, { "epoch": 0.04940268333027017, "grad_norm": 0.5649071335792542, "learning_rate": 0.00017796327212020035, "loss": 1.281, "step": 336 }, { "epoch": 0.049549715125895975, "grad_norm": 0.629722535610199, "learning_rate": 0.00017789649415692822, "loss": 1.4425, "step": 337 }, { "epoch": 0.04969674692152178, "grad_norm": 0.4795795977115631, "learning_rate": 0.0001778297161936561, "loss": 1.5157, "step": 338 }, { "epoch": 0.04984377871714758, "grad_norm": 0.5381354093551636, "learning_rate": 0.000177762938230384, "loss": 1.2374, "step": 339 }, { "epoch": 0.04999081051277339, "grad_norm": 0.45366761088371277, "learning_rate": 0.00017769616026711187, "loss": 1.6116, "step": 340 }, { "epoch": 0.05013784230839919, "grad_norm": 0.7714207768440247, "learning_rate": 0.00017762938230383974, "loss": 1.1197, "step": 341 }, { "epoch": 0.050284874104025, "grad_norm": 0.44338080286979675, "learning_rate": 0.0001775626043405676, "loss": 1.5348, "step": 342 }, { "epoch": 0.0504319058996508, "grad_norm": 0.5928587317466736, "learning_rate": 0.00017749582637729548, "loss": 1.3242, "step": 343 }, { "epoch": 0.050578937695276605, "grad_norm": 0.5419535040855408, "learning_rate": 0.00017742904841402339, "loss": 1.438, "step": 344 }, { "epoch": 0.050725969490902405, "grad_norm": 0.5179091095924377, "learning_rate": 0.00017736227045075126, "loss": 1.2534, "step": 345 }, { "epoch": 0.05087300128652821, "grad_norm": 0.5239900350570679, "learning_rate": 0.00017729549248747913, "loss": 1.6379, "step": 346 }, { "epoch": 0.05102003308215402, "grad_norm": 0.46397262811660767, "learning_rate": 0.000177228714524207, "loss": 1.4575, "step": 347 }, { "epoch": 0.05116706487777982, "grad_norm": 0.6184490919113159, "learning_rate": 0.0001771619365609349, "loss": 1.2542, "step": 348 }, { "epoch": 0.051314096673405626, "grad_norm": 0.7168741822242737, "learning_rate": 0.00017709515859766278, "loss": 1.0534, "step": 349 }, { "epoch": 0.05146112846903143, "grad_norm": 0.8870052099227905, "learning_rate": 0.00017702838063439068, "loss": 1.1871, "step": 350 }, { "epoch": 0.051608160264657234, "grad_norm": 0.4634273946285248, "learning_rate": 0.00017696160267111855, "loss": 1.4044, "step": 351 }, { "epoch": 0.051755192060283034, "grad_norm": 0.7017895579338074, "learning_rate": 0.00017689482470784642, "loss": 1.2395, "step": 352 }, { "epoch": 0.05190222385590884, "grad_norm": 0.4703320860862732, "learning_rate": 0.0001768280467445743, "loss": 1.7242, "step": 353 }, { "epoch": 0.05204925565153464, "grad_norm": 0.5330744981765747, "learning_rate": 0.00017676126878130217, "loss": 1.4936, "step": 354 }, { "epoch": 0.05219628744716045, "grad_norm": 0.4972611665725708, "learning_rate": 0.00017669449081803007, "loss": 1.2747, "step": 355 }, { "epoch": 0.052343319242786256, "grad_norm": 0.6031883955001831, "learning_rate": 0.00017662771285475794, "loss": 1.4086, "step": 356 }, { "epoch": 0.052490351038412056, "grad_norm": 0.44157350063323975, "learning_rate": 0.00017656093489148582, "loss": 1.4621, "step": 357 }, { "epoch": 0.05263738283403786, "grad_norm": 0.6725085377693176, "learning_rate": 0.0001764941569282137, "loss": 1.3614, "step": 358 }, { "epoch": 0.05278441462966366, "grad_norm": 0.6004043221473694, "learning_rate": 0.00017642737896494156, "loss": 1.3808, "step": 359 }, { "epoch": 0.05293144642528947, "grad_norm": 0.6323845982551575, "learning_rate": 0.00017636060100166946, "loss": 1.1405, "step": 360 }, { "epoch": 0.05307847822091527, "grad_norm": 0.5598623156547546, "learning_rate": 0.00017629382303839734, "loss": 1.2975, "step": 361 }, { "epoch": 0.05322551001654108, "grad_norm": 0.5515745878219604, "learning_rate": 0.0001762270450751252, "loss": 1.254, "step": 362 }, { "epoch": 0.05337254181216688, "grad_norm": 0.565914511680603, "learning_rate": 0.00017616026711185308, "loss": 1.5574, "step": 363 }, { "epoch": 0.053519573607792685, "grad_norm": 0.587992250919342, "learning_rate": 0.00017609348914858096, "loss": 1.2336, "step": 364 }, { "epoch": 0.05366660540341849, "grad_norm": 0.4784914255142212, "learning_rate": 0.00017602671118530886, "loss": 1.7337, "step": 365 }, { "epoch": 0.05381363719904429, "grad_norm": 0.3817237913608551, "learning_rate": 0.00017595993322203673, "loss": 1.3715, "step": 366 }, { "epoch": 0.0539606689946701, "grad_norm": 0.5429052114486694, "learning_rate": 0.00017589315525876463, "loss": 1.6991, "step": 367 }, { "epoch": 0.0541077007902959, "grad_norm": 0.4912809729576111, "learning_rate": 0.0001758263772954925, "loss": 1.1482, "step": 368 }, { "epoch": 0.05425473258592171, "grad_norm": 0.5083810687065125, "learning_rate": 0.00017575959933222038, "loss": 1.3946, "step": 369 }, { "epoch": 0.05440176438154751, "grad_norm": 0.6512159109115601, "learning_rate": 0.00017569282136894825, "loss": 1.3069, "step": 370 }, { "epoch": 0.054548796177173314, "grad_norm": 0.337384432554245, "learning_rate": 0.00017562604340567615, "loss": 1.6777, "step": 371 }, { "epoch": 0.054695827972799115, "grad_norm": 0.41576051712036133, "learning_rate": 0.00017555926544240402, "loss": 1.6626, "step": 372 }, { "epoch": 0.05484285976842492, "grad_norm": 0.4290235638618469, "learning_rate": 0.0001754924874791319, "loss": 1.5403, "step": 373 }, { "epoch": 0.05498989156405073, "grad_norm": 0.6958747506141663, "learning_rate": 0.00017542570951585977, "loss": 1.0432, "step": 374 }, { "epoch": 0.05513692335967653, "grad_norm": 0.6581677198410034, "learning_rate": 0.00017535893155258764, "loss": 0.8211, "step": 375 }, { "epoch": 0.055283955155302336, "grad_norm": 0.503719687461853, "learning_rate": 0.00017529215358931554, "loss": 1.3448, "step": 376 }, { "epoch": 0.055430986950928136, "grad_norm": 0.5379534959793091, "learning_rate": 0.00017522537562604342, "loss": 1.6441, "step": 377 }, { "epoch": 0.055578018746553944, "grad_norm": 0.5869199633598328, "learning_rate": 0.0001751585976627713, "loss": 1.306, "step": 378 }, { "epoch": 0.055725050542179744, "grad_norm": 0.7087686657905579, "learning_rate": 0.00017509181969949916, "loss": 1.1791, "step": 379 }, { "epoch": 0.05587208233780555, "grad_norm": 0.7464437484741211, "learning_rate": 0.00017502504173622704, "loss": 1.2648, "step": 380 }, { "epoch": 0.05601911413343135, "grad_norm": 0.5606329441070557, "learning_rate": 0.0001749582637729549, "loss": 1.5153, "step": 381 }, { "epoch": 0.05616614592905716, "grad_norm": 0.4861753582954407, "learning_rate": 0.0001748914858096828, "loss": 1.4284, "step": 382 }, { "epoch": 0.056313177724682965, "grad_norm": 0.5995767712593079, "learning_rate": 0.0001748247078464107, "loss": 1.3751, "step": 383 }, { "epoch": 0.056460209520308766, "grad_norm": 0.4739896059036255, "learning_rate": 0.00017475792988313858, "loss": 1.7543, "step": 384 }, { "epoch": 0.05660724131593457, "grad_norm": 0.6110029816627502, "learning_rate": 0.00017469115191986646, "loss": 1.3401, "step": 385 }, { "epoch": 0.05675427311156037, "grad_norm": 0.6064412593841553, "learning_rate": 0.00017462437395659433, "loss": 1.2851, "step": 386 }, { "epoch": 0.05690130490718618, "grad_norm": 0.6929087042808533, "learning_rate": 0.00017455759599332223, "loss": 1.3166, "step": 387 }, { "epoch": 0.05704833670281198, "grad_norm": 0.7395351529121399, "learning_rate": 0.0001744908180300501, "loss": 1.7672, "step": 388 }, { "epoch": 0.05719536849843779, "grad_norm": 0.5807111263275146, "learning_rate": 0.00017442404006677798, "loss": 1.0606, "step": 389 }, { "epoch": 0.057342400294063595, "grad_norm": 0.610605001449585, "learning_rate": 0.00017435726210350585, "loss": 1.0741, "step": 390 }, { "epoch": 0.057489432089689395, "grad_norm": 0.5019125938415527, "learning_rate": 0.00017429048414023372, "loss": 1.3969, "step": 391 }, { "epoch": 0.0576364638853152, "grad_norm": 0.6552882790565491, "learning_rate": 0.00017422370617696162, "loss": 1.1489, "step": 392 }, { "epoch": 0.057783495680941, "grad_norm": 0.6050765514373779, "learning_rate": 0.0001741569282136895, "loss": 1.2225, "step": 393 }, { "epoch": 0.05793052747656681, "grad_norm": 0.6012828946113586, "learning_rate": 0.00017409015025041737, "loss": 1.56, "step": 394 }, { "epoch": 0.05807755927219261, "grad_norm": 0.37168624997138977, "learning_rate": 0.00017402337228714524, "loss": 1.5677, "step": 395 }, { "epoch": 0.05822459106781842, "grad_norm": 0.5893239378929138, "learning_rate": 0.00017395659432387311, "loss": 1.2203, "step": 396 }, { "epoch": 0.05837162286344422, "grad_norm": 0.3859291970729828, "learning_rate": 0.00017388981636060101, "loss": 1.3483, "step": 397 }, { "epoch": 0.058518654659070024, "grad_norm": 0.5526376366615295, "learning_rate": 0.0001738230383973289, "loss": 1.3134, "step": 398 }, { "epoch": 0.05866568645469583, "grad_norm": 0.6875793933868408, "learning_rate": 0.00017375626043405676, "loss": 1.4348, "step": 399 }, { "epoch": 0.05881271825032163, "grad_norm": 0.45504531264305115, "learning_rate": 0.00017368948247078466, "loss": 1.2822, "step": 400 }, { "epoch": 0.05895975004594744, "grad_norm": 0.4531843662261963, "learning_rate": 0.00017362270450751253, "loss": 1.3129, "step": 401 }, { "epoch": 0.05910678184157324, "grad_norm": 0.5788389444351196, "learning_rate": 0.0001735559265442404, "loss": 1.1942, "step": 402 }, { "epoch": 0.059253813637199046, "grad_norm": 0.7107940912246704, "learning_rate": 0.0001734891485809683, "loss": 1.0602, "step": 403 }, { "epoch": 0.059400845432824846, "grad_norm": 0.49619370698928833, "learning_rate": 0.00017342237061769618, "loss": 1.7356, "step": 404 }, { "epoch": 0.05954787722845065, "grad_norm": 0.5672197341918945, "learning_rate": 0.00017335559265442405, "loss": 1.5072, "step": 405 }, { "epoch": 0.059694909024076453, "grad_norm": 0.37244269251823425, "learning_rate": 0.00017328881469115193, "loss": 1.5897, "step": 406 }, { "epoch": 0.05984194081970226, "grad_norm": 0.7834572196006775, "learning_rate": 0.0001732220367278798, "loss": 1.341, "step": 407 }, { "epoch": 0.05998897261532807, "grad_norm": 0.5827113389968872, "learning_rate": 0.0001731552587646077, "loss": 1.4502, "step": 408 }, { "epoch": 0.06013600441095387, "grad_norm": 0.5970304608345032, "learning_rate": 0.00017308848080133557, "loss": 1.277, "step": 409 }, { "epoch": 0.060283036206579675, "grad_norm": 0.5760021209716797, "learning_rate": 0.00017302170283806345, "loss": 1.278, "step": 410 }, { "epoch": 0.060430068002205475, "grad_norm": 0.5642597675323486, "learning_rate": 0.00017295492487479132, "loss": 1.5631, "step": 411 }, { "epoch": 0.06057709979783128, "grad_norm": 0.5759330987930298, "learning_rate": 0.0001728881469115192, "loss": 1.2142, "step": 412 }, { "epoch": 0.06072413159345708, "grad_norm": 0.7772310376167297, "learning_rate": 0.0001728213689482471, "loss": 1.0597, "step": 413 }, { "epoch": 0.06087116338908289, "grad_norm": 0.5275145173072815, "learning_rate": 0.00017275459098497497, "loss": 1.2722, "step": 414 }, { "epoch": 0.06101819518470869, "grad_norm": 0.6060823798179626, "learning_rate": 0.00017268781302170284, "loss": 1.2057, "step": 415 }, { "epoch": 0.0611652269803345, "grad_norm": 0.5434503555297852, "learning_rate": 0.0001726210350584307, "loss": 0.9913, "step": 416 }, { "epoch": 0.061312258775960304, "grad_norm": 0.5890235304832458, "learning_rate": 0.0001725542570951586, "loss": 1.4898, "step": 417 }, { "epoch": 0.061459290571586105, "grad_norm": 0.42771458625793457, "learning_rate": 0.0001724874791318865, "loss": 1.3068, "step": 418 }, { "epoch": 0.06160632236721191, "grad_norm": 0.5566801428794861, "learning_rate": 0.0001724207011686144, "loss": 1.2072, "step": 419 }, { "epoch": 0.06175335416283771, "grad_norm": 0.7288262844085693, "learning_rate": 0.00017235392320534226, "loss": 1.3318, "step": 420 }, { "epoch": 0.06190038595846352, "grad_norm": 0.6397382020950317, "learning_rate": 0.00017228714524207013, "loss": 1.2147, "step": 421 }, { "epoch": 0.06204741775408932, "grad_norm": 0.6314074993133545, "learning_rate": 0.000172220367278798, "loss": 1.0967, "step": 422 }, { "epoch": 0.062194449549715126, "grad_norm": 0.6998132467269897, "learning_rate": 0.00017215358931552588, "loss": 0.9049, "step": 423 }, { "epoch": 0.06234148134534093, "grad_norm": 0.6025044918060303, "learning_rate": 0.00017208681135225378, "loss": 1.2031, "step": 424 }, { "epoch": 0.062488513140966734, "grad_norm": 0.40608635544776917, "learning_rate": 0.00017202003338898165, "loss": 1.1597, "step": 425 }, { "epoch": 0.06263554493659254, "grad_norm": 0.4057765305042267, "learning_rate": 0.00017195325542570953, "loss": 1.8077, "step": 426 }, { "epoch": 0.06278257673221835, "grad_norm": 0.4467298984527588, "learning_rate": 0.0001718864774624374, "loss": 1.5396, "step": 427 }, { "epoch": 0.06292960852784414, "grad_norm": 0.4709654152393341, "learning_rate": 0.00017181969949916527, "loss": 1.0727, "step": 428 }, { "epoch": 0.06307664032346995, "grad_norm": 0.7631885409355164, "learning_rate": 0.00017175292153589317, "loss": 1.2302, "step": 429 }, { "epoch": 0.06322367211909576, "grad_norm": 0.7356657981872559, "learning_rate": 0.00017168614357262105, "loss": 1.2045, "step": 430 }, { "epoch": 0.06337070391472156, "grad_norm": 0.5696567296981812, "learning_rate": 0.00017161936560934892, "loss": 1.4382, "step": 431 }, { "epoch": 0.06351773571034736, "grad_norm": 0.9167963862419128, "learning_rate": 0.0001715525876460768, "loss": 1.145, "step": 432 }, { "epoch": 0.06366476750597316, "grad_norm": 0.5019359588623047, "learning_rate": 0.00017148580968280467, "loss": 1.2816, "step": 433 }, { "epoch": 0.06381179930159897, "grad_norm": 0.7275418639183044, "learning_rate": 0.00017141903171953257, "loss": 1.0573, "step": 434 }, { "epoch": 0.06395883109722478, "grad_norm": 0.5103479027748108, "learning_rate": 0.00017135225375626044, "loss": 1.8111, "step": 435 }, { "epoch": 0.06410586289285058, "grad_norm": 0.30059221386909485, "learning_rate": 0.00017128547579298834, "loss": 1.0134, "step": 436 }, { "epoch": 0.06425289468847638, "grad_norm": 0.4508712589740753, "learning_rate": 0.0001712186978297162, "loss": 1.5301, "step": 437 }, { "epoch": 0.06439992648410219, "grad_norm": 0.5325943827629089, "learning_rate": 0.00017115191986644409, "loss": 1.3226, "step": 438 }, { "epoch": 0.06454695827972799, "grad_norm": 0.7640469074249268, "learning_rate": 0.00017108514190317196, "loss": 1.1013, "step": 439 }, { "epoch": 0.0646939900753538, "grad_norm": 0.4502774477005005, "learning_rate": 0.00017101836393989986, "loss": 1.752, "step": 440 }, { "epoch": 0.06484102187097959, "grad_norm": 0.34934666752815247, "learning_rate": 0.00017095158597662773, "loss": 1.4078, "step": 441 }, { "epoch": 0.0649880536666054, "grad_norm": 0.41137194633483887, "learning_rate": 0.0001708848080133556, "loss": 1.5522, "step": 442 }, { "epoch": 0.06513508546223121, "grad_norm": 0.5682129263877869, "learning_rate": 0.00017081803005008348, "loss": 1.0836, "step": 443 }, { "epoch": 0.06528211725785701, "grad_norm": 0.5651811361312866, "learning_rate": 0.00017075125208681135, "loss": 1.4422, "step": 444 }, { "epoch": 0.06542914905348282, "grad_norm": 0.5942316055297852, "learning_rate": 0.00017068447412353925, "loss": 0.9892, "step": 445 }, { "epoch": 0.06557618084910861, "grad_norm": 0.5490965843200684, "learning_rate": 0.00017061769616026712, "loss": 1.1708, "step": 446 }, { "epoch": 0.06572321264473442, "grad_norm": 0.4732477068901062, "learning_rate": 0.000170550918196995, "loss": 1.8613, "step": 447 }, { "epoch": 0.06587024444036023, "grad_norm": 0.5853902697563171, "learning_rate": 0.00017048414023372287, "loss": 1.8948, "step": 448 }, { "epoch": 0.06601727623598604, "grad_norm": 0.6809552907943726, "learning_rate": 0.00017041736227045074, "loss": 1.2675, "step": 449 }, { "epoch": 0.06616430803161183, "grad_norm": 0.43026697635650635, "learning_rate": 0.00017035058430717862, "loss": 1.3654, "step": 450 }, { "epoch": 0.06631133982723764, "grad_norm": 0.5828919410705566, "learning_rate": 0.00017028380634390652, "loss": 1.1871, "step": 451 }, { "epoch": 0.06645837162286344, "grad_norm": 0.6122982501983643, "learning_rate": 0.0001702170283806344, "loss": 1.1506, "step": 452 }, { "epoch": 0.06660540341848925, "grad_norm": 0.3833220899105072, "learning_rate": 0.0001701502504173623, "loss": 2.013, "step": 453 }, { "epoch": 0.06675243521411506, "grad_norm": 0.5167496800422668, "learning_rate": 0.00017008347245409016, "loss": 1.2186, "step": 454 }, { "epoch": 0.06689946700974085, "grad_norm": 0.5950973033905029, "learning_rate": 0.00017001669449081804, "loss": 1.1173, "step": 455 }, { "epoch": 0.06704649880536666, "grad_norm": 0.6563694477081299, "learning_rate": 0.00016994991652754594, "loss": 1.3389, "step": 456 }, { "epoch": 0.06719353060099247, "grad_norm": 0.47440657019615173, "learning_rate": 0.0001698831385642738, "loss": 1.3505, "step": 457 }, { "epoch": 0.06734056239661827, "grad_norm": 0.7902119755744934, "learning_rate": 0.00016981636060100168, "loss": 1.2941, "step": 458 }, { "epoch": 0.06748759419224407, "grad_norm": 0.7536194920539856, "learning_rate": 0.00016974958263772956, "loss": 1.044, "step": 459 }, { "epoch": 0.06763462598786987, "grad_norm": 0.41606923937797546, "learning_rate": 0.00016968280467445743, "loss": 1.4629, "step": 460 }, { "epoch": 0.06778165778349568, "grad_norm": 0.7575483322143555, "learning_rate": 0.00016961602671118533, "loss": 1.2351, "step": 461 }, { "epoch": 0.06792868957912149, "grad_norm": 0.5046858787536621, "learning_rate": 0.0001695492487479132, "loss": 1.5136, "step": 462 }, { "epoch": 0.0680757213747473, "grad_norm": 0.3712179362773895, "learning_rate": 0.00016948247078464108, "loss": 1.5869, "step": 463 }, { "epoch": 0.06822275317037309, "grad_norm": 0.5720416903495789, "learning_rate": 0.00016941569282136895, "loss": 1.3351, "step": 464 }, { "epoch": 0.0683697849659989, "grad_norm": 0.5300132036209106, "learning_rate": 0.00016934891485809682, "loss": 1.0023, "step": 465 }, { "epoch": 0.0685168167616247, "grad_norm": 0.8313441276550293, "learning_rate": 0.0001692821368948247, "loss": 1.3816, "step": 466 }, { "epoch": 0.06866384855725051, "grad_norm": 0.6177215576171875, "learning_rate": 0.0001692153589315526, "loss": 1.1799, "step": 467 }, { "epoch": 0.06881088035287632, "grad_norm": 0.6721746921539307, "learning_rate": 0.00016914858096828047, "loss": 0.9188, "step": 468 }, { "epoch": 0.06895791214850211, "grad_norm": 0.5051236152648926, "learning_rate": 0.00016908180300500834, "loss": 1.474, "step": 469 }, { "epoch": 0.06910494394412792, "grad_norm": 0.6177197098731995, "learning_rate": 0.00016901502504173624, "loss": 1.2215, "step": 470 }, { "epoch": 0.06925197573975372, "grad_norm": 0.6086992025375366, "learning_rate": 0.00016894824707846412, "loss": 1.3015, "step": 471 }, { "epoch": 0.06939900753537953, "grad_norm": 0.3402976393699646, "learning_rate": 0.00016888146911519202, "loss": 1.3786, "step": 472 }, { "epoch": 0.06954603933100532, "grad_norm": 0.5347069501876831, "learning_rate": 0.0001688146911519199, "loss": 1.1683, "step": 473 }, { "epoch": 0.06969307112663113, "grad_norm": 0.5726178884506226, "learning_rate": 0.00016874791318864776, "loss": 0.9839, "step": 474 }, { "epoch": 0.06984010292225694, "grad_norm": 0.5598688125610352, "learning_rate": 0.00016868113522537564, "loss": 1.019, "step": 475 }, { "epoch": 0.06998713471788275, "grad_norm": 0.45043468475341797, "learning_rate": 0.0001686143572621035, "loss": 1.7159, "step": 476 }, { "epoch": 0.07013416651350855, "grad_norm": 0.5194405913352966, "learning_rate": 0.0001685475792988314, "loss": 1.0443, "step": 477 }, { "epoch": 0.07028119830913435, "grad_norm": 0.521621584892273, "learning_rate": 0.00016848080133555928, "loss": 1.0726, "step": 478 }, { "epoch": 0.07042823010476015, "grad_norm": 0.7171264290809631, "learning_rate": 0.00016841402337228716, "loss": 1.1849, "step": 479 }, { "epoch": 0.07057526190038596, "grad_norm": 0.6459790468215942, "learning_rate": 0.00016834724540901503, "loss": 1.0724, "step": 480 }, { "epoch": 0.07072229369601177, "grad_norm": 0.646496057510376, "learning_rate": 0.0001682804674457429, "loss": 1.2477, "step": 481 }, { "epoch": 0.07086932549163756, "grad_norm": 0.49232256412506104, "learning_rate": 0.00016821368948247077, "loss": 1.2501, "step": 482 }, { "epoch": 0.07101635728726337, "grad_norm": 0.5565536022186279, "learning_rate": 0.00016814691151919868, "loss": 1.4978, "step": 483 }, { "epoch": 0.07116338908288918, "grad_norm": 0.5520440340042114, "learning_rate": 0.00016808013355592655, "loss": 1.3737, "step": 484 }, { "epoch": 0.07131042087851498, "grad_norm": 0.6825416088104248, "learning_rate": 0.00016801335559265442, "loss": 1.2712, "step": 485 }, { "epoch": 0.07145745267414079, "grad_norm": 0.494396448135376, "learning_rate": 0.0001679465776293823, "loss": 1.3677, "step": 486 }, { "epoch": 0.07160448446976658, "grad_norm": 0.4603559672832489, "learning_rate": 0.0001678797996661102, "loss": 1.3218, "step": 487 }, { "epoch": 0.07175151626539239, "grad_norm": 0.4678351879119873, "learning_rate": 0.00016781302170283807, "loss": 1.15, "step": 488 }, { "epoch": 0.0718985480610182, "grad_norm": 0.569291353225708, "learning_rate": 0.00016774624373956597, "loss": 1.273, "step": 489 }, { "epoch": 0.072045579856644, "grad_norm": 0.48903244733810425, "learning_rate": 0.00016767946577629384, "loss": 1.6605, "step": 490 }, { "epoch": 0.0721926116522698, "grad_norm": 0.6263558864593506, "learning_rate": 0.00016761268781302171, "loss": 1.0484, "step": 491 }, { "epoch": 0.0723396434478956, "grad_norm": 0.49533993005752563, "learning_rate": 0.0001675459098497496, "loss": 1.5768, "step": 492 }, { "epoch": 0.07248667524352141, "grad_norm": 0.6094350814819336, "learning_rate": 0.0001674791318864775, "loss": 1.0419, "step": 493 }, { "epoch": 0.07263370703914722, "grad_norm": 0.48206254839897156, "learning_rate": 0.00016741235392320536, "loss": 1.3751, "step": 494 }, { "epoch": 0.07278073883477303, "grad_norm": 0.4551871716976166, "learning_rate": 0.00016734557595993323, "loss": 1.2139, "step": 495 }, { "epoch": 0.07292777063039882, "grad_norm": 0.7227940559387207, "learning_rate": 0.0001672787979966611, "loss": 1.0708, "step": 496 }, { "epoch": 0.07307480242602463, "grad_norm": 0.45795151591300964, "learning_rate": 0.00016721202003338898, "loss": 1.8188, "step": 497 }, { "epoch": 0.07322183422165043, "grad_norm": 0.6164366602897644, "learning_rate": 0.00016714524207011685, "loss": 1.0991, "step": 498 }, { "epoch": 0.07336886601727624, "grad_norm": 0.5615074038505554, "learning_rate": 0.00016707846410684475, "loss": 1.1716, "step": 499 }, { "epoch": 0.07351589781290203, "grad_norm": 0.607083797454834, "learning_rate": 0.00016701168614357263, "loss": 1.2301, "step": 500 }, { "epoch": 0.07366292960852784, "grad_norm": 0.635743260383606, "learning_rate": 0.0001669449081803005, "loss": 1.1473, "step": 501 }, { "epoch": 0.07380996140415365, "grad_norm": 0.691044270992279, "learning_rate": 0.00016687813021702837, "loss": 1.3121, "step": 502 }, { "epoch": 0.07395699319977946, "grad_norm": 0.6877620220184326, "learning_rate": 0.00016681135225375625, "loss": 1.1813, "step": 503 }, { "epoch": 0.07410402499540526, "grad_norm": 0.4766697287559509, "learning_rate": 0.00016674457429048415, "loss": 1.314, "step": 504 }, { "epoch": 0.07425105679103106, "grad_norm": 0.5810314416885376, "learning_rate": 0.00016667779632721202, "loss": 1.523, "step": 505 }, { "epoch": 0.07439808858665686, "grad_norm": 0.6551008224487305, "learning_rate": 0.00016661101836393992, "loss": 1.4418, "step": 506 }, { "epoch": 0.07454512038228267, "grad_norm": 0.5772584080696106, "learning_rate": 0.0001665442404006678, "loss": 1.3349, "step": 507 }, { "epoch": 0.07469215217790848, "grad_norm": 0.630496621131897, "learning_rate": 0.00016647746243739567, "loss": 1.1878, "step": 508 }, { "epoch": 0.07483918397353427, "grad_norm": 0.6510344743728638, "learning_rate": 0.00016641068447412357, "loss": 1.0391, "step": 509 }, { "epoch": 0.07498621576916008, "grad_norm": 0.5054215788841248, "learning_rate": 0.00016634390651085144, "loss": 1.1182, "step": 510 }, { "epoch": 0.07513324756478588, "grad_norm": 0.5160056352615356, "learning_rate": 0.0001662771285475793, "loss": 1.2966, "step": 511 }, { "epoch": 0.07528027936041169, "grad_norm": 0.4429018497467041, "learning_rate": 0.00016621035058430719, "loss": 0.9794, "step": 512 }, { "epoch": 0.0754273111560375, "grad_norm": 0.5845345854759216, "learning_rate": 0.00016614357262103506, "loss": 1.1038, "step": 513 }, { "epoch": 0.07557434295166329, "grad_norm": 0.5730257630348206, "learning_rate": 0.00016607679465776293, "loss": 1.6736, "step": 514 }, { "epoch": 0.0757213747472891, "grad_norm": 0.5783519744873047, "learning_rate": 0.00016601001669449083, "loss": 1.1062, "step": 515 }, { "epoch": 0.0758684065429149, "grad_norm": 0.5606603622436523, "learning_rate": 0.0001659432387312187, "loss": 0.9852, "step": 516 }, { "epoch": 0.07601543833854071, "grad_norm": 0.4204868972301483, "learning_rate": 0.00016587646076794658, "loss": 1.5955, "step": 517 }, { "epoch": 0.07616247013416651, "grad_norm": 0.46632620692253113, "learning_rate": 0.00016580968280467445, "loss": 1.3355, "step": 518 }, { "epoch": 0.07630950192979231, "grad_norm": 0.4620303213596344, "learning_rate": 0.00016574290484140233, "loss": 1.5078, "step": 519 }, { "epoch": 0.07645653372541812, "grad_norm": 0.6300202012062073, "learning_rate": 0.00016567612687813023, "loss": 1.2977, "step": 520 }, { "epoch": 0.07660356552104393, "grad_norm": 0.6746147871017456, "learning_rate": 0.0001656093489148581, "loss": 1.0621, "step": 521 }, { "epoch": 0.07675059731666974, "grad_norm": 0.6547514796257019, "learning_rate": 0.00016554257095158597, "loss": 1.3631, "step": 522 }, { "epoch": 0.07689762911229553, "grad_norm": 0.6017608642578125, "learning_rate": 0.00016547579298831387, "loss": 1.0852, "step": 523 }, { "epoch": 0.07704466090792134, "grad_norm": 0.5172386765480042, "learning_rate": 0.00016540901502504175, "loss": 1.1922, "step": 524 }, { "epoch": 0.07719169270354714, "grad_norm": 0.6799414157867432, "learning_rate": 0.00016534223706176965, "loss": 1.2676, "step": 525 }, { "epoch": 0.07733872449917295, "grad_norm": 0.5005510449409485, "learning_rate": 0.00016527545909849752, "loss": 1.3181, "step": 526 }, { "epoch": 0.07748575629479874, "grad_norm": 0.45933207869529724, "learning_rate": 0.0001652086811352254, "loss": 1.6881, "step": 527 }, { "epoch": 0.07763278809042455, "grad_norm": 0.5110999941825867, "learning_rate": 0.00016514190317195327, "loss": 1.3687, "step": 528 }, { "epoch": 0.07777981988605036, "grad_norm": 0.6447016596794128, "learning_rate": 0.00016507512520868114, "loss": 1.2424, "step": 529 }, { "epoch": 0.07792685168167617, "grad_norm": 0.5730313062667847, "learning_rate": 0.00016500834724540904, "loss": 1.3088, "step": 530 }, { "epoch": 0.07807388347730197, "grad_norm": 0.6265355944633484, "learning_rate": 0.0001649415692821369, "loss": 0.965, "step": 531 }, { "epoch": 0.07822091527292777, "grad_norm": 0.5655127167701721, "learning_rate": 0.00016487479131886478, "loss": 1.32, "step": 532 }, { "epoch": 0.07836794706855357, "grad_norm": 0.6001642346382141, "learning_rate": 0.00016480801335559266, "loss": 1.1968, "step": 533 }, { "epoch": 0.07851497886417938, "grad_norm": 0.5693176984786987, "learning_rate": 0.00016474123539232053, "loss": 0.967, "step": 534 }, { "epoch": 0.07866201065980519, "grad_norm": 0.5087970495223999, "learning_rate": 0.0001646744574290484, "loss": 1.136, "step": 535 }, { "epoch": 0.07880904245543098, "grad_norm": 0.571503221988678, "learning_rate": 0.0001646076794657763, "loss": 1.3875, "step": 536 }, { "epoch": 0.07895607425105679, "grad_norm": 0.4929220974445343, "learning_rate": 0.00016454090150250418, "loss": 1.3136, "step": 537 }, { "epoch": 0.0791031060466826, "grad_norm": 0.5236201286315918, "learning_rate": 0.00016447412353923205, "loss": 1.267, "step": 538 }, { "epoch": 0.0792501378423084, "grad_norm": 0.6344238519668579, "learning_rate": 0.00016440734557595992, "loss": 1.4561, "step": 539 }, { "epoch": 0.07939716963793421, "grad_norm": 0.4705124497413635, "learning_rate": 0.00016434056761268782, "loss": 1.6744, "step": 540 }, { "epoch": 0.07954420143356, "grad_norm": 0.5737633109092712, "learning_rate": 0.0001642737896494157, "loss": 0.8752, "step": 541 }, { "epoch": 0.07969123322918581, "grad_norm": 0.5391960144042969, "learning_rate": 0.0001642070116861436, "loss": 1.2269, "step": 542 }, { "epoch": 0.07983826502481162, "grad_norm": 0.4671536982059479, "learning_rate": 0.00016414023372287147, "loss": 1.7907, "step": 543 }, { "epoch": 0.07998529682043742, "grad_norm": 0.4537203013896942, "learning_rate": 0.00016407345575959934, "loss": 1.486, "step": 544 }, { "epoch": 0.08013232861606322, "grad_norm": 0.5616577863693237, "learning_rate": 0.00016400667779632722, "loss": 1.3344, "step": 545 }, { "epoch": 0.08027936041168902, "grad_norm": 0.4991005063056946, "learning_rate": 0.00016393989983305512, "loss": 1.2648, "step": 546 }, { "epoch": 0.08042639220731483, "grad_norm": 0.5824306011199951, "learning_rate": 0.000163873121869783, "loss": 1.1378, "step": 547 }, { "epoch": 0.08057342400294064, "grad_norm": 0.7721166610717773, "learning_rate": 0.00016380634390651086, "loss": 1.0335, "step": 548 }, { "epoch": 0.08072045579856645, "grad_norm": 0.5608398914337158, "learning_rate": 0.00016373956594323874, "loss": 1.6231, "step": 549 }, { "epoch": 0.08086748759419224, "grad_norm": 0.5794057250022888, "learning_rate": 0.0001636727879799666, "loss": 1.1632, "step": 550 }, { "epoch": 0.08101451938981805, "grad_norm": 0.5831037163734436, "learning_rate": 0.00016360601001669448, "loss": 0.891, "step": 551 }, { "epoch": 0.08116155118544385, "grad_norm": 0.5077447295188904, "learning_rate": 0.00016353923205342238, "loss": 1.416, "step": 552 }, { "epoch": 0.08130858298106966, "grad_norm": 0.6380168199539185, "learning_rate": 0.00016347245409015026, "loss": 1.2712, "step": 553 }, { "epoch": 0.08145561477669545, "grad_norm": 0.4334633946418762, "learning_rate": 0.00016340567612687813, "loss": 2.1186, "step": 554 }, { "epoch": 0.08160264657232126, "grad_norm": 0.5638923048973083, "learning_rate": 0.000163338898163606, "loss": 1.4215, "step": 555 }, { "epoch": 0.08174967836794707, "grad_norm": 0.5628969669342041, "learning_rate": 0.00016327212020033388, "loss": 1.3142, "step": 556 }, { "epoch": 0.08189671016357288, "grad_norm": 0.5809241533279419, "learning_rate": 0.00016320534223706178, "loss": 1.2418, "step": 557 }, { "epoch": 0.08204374195919868, "grad_norm": 0.5429731011390686, "learning_rate": 0.00016313856427378965, "loss": 1.4285, "step": 558 }, { "epoch": 0.08219077375482448, "grad_norm": 0.5208382606506348, "learning_rate": 0.00016307178631051755, "loss": 1.7562, "step": 559 }, { "epoch": 0.08233780555045028, "grad_norm": 0.5188065767288208, "learning_rate": 0.00016300500834724542, "loss": 1.7345, "step": 560 }, { "epoch": 0.08248483734607609, "grad_norm": 0.4500925540924072, "learning_rate": 0.0001629382303839733, "loss": 0.9377, "step": 561 }, { "epoch": 0.0826318691417019, "grad_norm": 0.5992081165313721, "learning_rate": 0.0001628714524207012, "loss": 1.3034, "step": 562 }, { "epoch": 0.0827789009373277, "grad_norm": 0.3867233395576477, "learning_rate": 0.00016280467445742907, "loss": 1.3737, "step": 563 }, { "epoch": 0.0829259327329535, "grad_norm": 0.546248197555542, "learning_rate": 0.00016273789649415694, "loss": 1.3586, "step": 564 }, { "epoch": 0.0830729645285793, "grad_norm": 0.6845179200172424, "learning_rate": 0.00016267111853088482, "loss": 1.4468, "step": 565 }, { "epoch": 0.08321999632420511, "grad_norm": 0.5188910961151123, "learning_rate": 0.0001626043405676127, "loss": 1.1385, "step": 566 }, { "epoch": 0.08336702811983092, "grad_norm": 0.5526028871536255, "learning_rate": 0.00016253756260434056, "loss": 1.2791, "step": 567 }, { "epoch": 0.08351405991545671, "grad_norm": 0.6084735989570618, "learning_rate": 0.00016247078464106846, "loss": 1.3271, "step": 568 }, { "epoch": 0.08366109171108252, "grad_norm": 0.48478785157203674, "learning_rate": 0.00016240400667779634, "loss": 1.3556, "step": 569 }, { "epoch": 0.08380812350670833, "grad_norm": 0.5505275726318359, "learning_rate": 0.0001623372287145242, "loss": 1.327, "step": 570 }, { "epoch": 0.08395515530233413, "grad_norm": 0.5384365916252136, "learning_rate": 0.00016227045075125208, "loss": 1.1407, "step": 571 }, { "epoch": 0.08410218709795994, "grad_norm": 0.6654772758483887, "learning_rate": 0.00016220367278797996, "loss": 0.9606, "step": 572 }, { "epoch": 0.08424921889358573, "grad_norm": 0.7263393998146057, "learning_rate": 0.00016213689482470786, "loss": 1.5704, "step": 573 }, { "epoch": 0.08439625068921154, "grad_norm": 0.5659138560295105, "learning_rate": 0.00016207011686143573, "loss": 1.5329, "step": 574 }, { "epoch": 0.08454328248483735, "grad_norm": 0.5163227915763855, "learning_rate": 0.0001620033388981636, "loss": 1.3066, "step": 575 }, { "epoch": 0.08469031428046316, "grad_norm": 0.6250882744789124, "learning_rate": 0.0001619365609348915, "loss": 1.1633, "step": 576 }, { "epoch": 0.08483734607608895, "grad_norm": 0.8098819255828857, "learning_rate": 0.00016186978297161938, "loss": 1.1585, "step": 577 }, { "epoch": 0.08498437787171476, "grad_norm": 0.5937548279762268, "learning_rate": 0.00016180300500834728, "loss": 1.1851, "step": 578 }, { "epoch": 0.08513140966734056, "grad_norm": 0.5967146158218384, "learning_rate": 0.00016173622704507515, "loss": 1.5233, "step": 579 }, { "epoch": 0.08527844146296637, "grad_norm": 0.5244185924530029, "learning_rate": 0.00016166944908180302, "loss": 1.5209, "step": 580 }, { "epoch": 0.08542547325859218, "grad_norm": 0.859516978263855, "learning_rate": 0.0001616026711185309, "loss": 1.1576, "step": 581 }, { "epoch": 0.08557250505421797, "grad_norm": 0.5957148671150208, "learning_rate": 0.00016153589315525877, "loss": 1.3192, "step": 582 }, { "epoch": 0.08571953684984378, "grad_norm": 0.702893078327179, "learning_rate": 0.00016146911519198664, "loss": 1.1522, "step": 583 }, { "epoch": 0.08586656864546958, "grad_norm": 0.7043382525444031, "learning_rate": 0.00016140233722871454, "loss": 1.1365, "step": 584 }, { "epoch": 0.08601360044109539, "grad_norm": 0.49930375814437866, "learning_rate": 0.00016133555926544241, "loss": 1.7699, "step": 585 }, { "epoch": 0.08616063223672119, "grad_norm": 0.665249228477478, "learning_rate": 0.0001612687813021703, "loss": 1.1508, "step": 586 }, { "epoch": 0.08630766403234699, "grad_norm": 0.7059361934661865, "learning_rate": 0.00016120200333889816, "loss": 1.4488, "step": 587 }, { "epoch": 0.0864546958279728, "grad_norm": 0.582984983921051, "learning_rate": 0.00016113522537562603, "loss": 1.4704, "step": 588 }, { "epoch": 0.0866017276235986, "grad_norm": 0.6167340278625488, "learning_rate": 0.00016106844741235393, "loss": 1.4451, "step": 589 }, { "epoch": 0.08674875941922441, "grad_norm": 0.8113065958023071, "learning_rate": 0.0001610016694490818, "loss": 0.986, "step": 590 }, { "epoch": 0.08689579121485021, "grad_norm": 0.48449090123176575, "learning_rate": 0.00016093489148580968, "loss": 0.894, "step": 591 }, { "epoch": 0.08704282301047601, "grad_norm": 0.7519744634628296, "learning_rate": 0.00016086811352253755, "loss": 1.1783, "step": 592 }, { "epoch": 0.08718985480610182, "grad_norm": 0.6161187887191772, "learning_rate": 0.00016080133555926545, "loss": 1.3529, "step": 593 }, { "epoch": 0.08733688660172763, "grad_norm": 0.5949079394340515, "learning_rate": 0.00016073455759599333, "loss": 1.5085, "step": 594 }, { "epoch": 0.08748391839735342, "grad_norm": 0.7042183876037598, "learning_rate": 0.00016066777963272123, "loss": 1.3067, "step": 595 }, { "epoch": 0.08763095019297923, "grad_norm": 0.8088086843490601, "learning_rate": 0.0001606010016694491, "loss": 1.0476, "step": 596 }, { "epoch": 0.08777798198860504, "grad_norm": 0.5127574801445007, "learning_rate": 0.00016053422370617697, "loss": 1.2757, "step": 597 }, { "epoch": 0.08792501378423084, "grad_norm": 0.4990959167480469, "learning_rate": 0.00016046744574290485, "loss": 1.4899, "step": 598 }, { "epoch": 0.08807204557985665, "grad_norm": 0.6102316379547119, "learning_rate": 0.00016040066777963272, "loss": 1.3145, "step": 599 }, { "epoch": 0.08821907737548244, "grad_norm": 0.6646149158477783, "learning_rate": 0.00016033388981636062, "loss": 0.9433, "step": 600 }, { "epoch": 0.08836610917110825, "grad_norm": 0.5027661919593811, "learning_rate": 0.0001602671118530885, "loss": 1.2636, "step": 601 }, { "epoch": 0.08851314096673406, "grad_norm": 0.5804199576377869, "learning_rate": 0.00016020033388981637, "loss": 1.4214, "step": 602 }, { "epoch": 0.08866017276235987, "grad_norm": 0.6491842269897461, "learning_rate": 0.00016013355592654424, "loss": 1.2557, "step": 603 }, { "epoch": 0.08880720455798566, "grad_norm": 0.4192531406879425, "learning_rate": 0.0001600667779632721, "loss": 1.7915, "step": 604 }, { "epoch": 0.08895423635361147, "grad_norm": 0.6963093876838684, "learning_rate": 0.00016, "loss": 1.1658, "step": 605 }, { "epoch": 0.08910126814923727, "grad_norm": 0.6218084692955017, "learning_rate": 0.00015993322203672789, "loss": 1.3154, "step": 606 }, { "epoch": 0.08924829994486308, "grad_norm": 0.4127342104911804, "learning_rate": 0.00015986644407345576, "loss": 1.5402, "step": 607 }, { "epoch": 0.08939533174048889, "grad_norm": 0.7493715286254883, "learning_rate": 0.00015979966611018363, "loss": 1.1, "step": 608 }, { "epoch": 0.08954236353611468, "grad_norm": 0.46376389265060425, "learning_rate": 0.0001597328881469115, "loss": 1.7722, "step": 609 }, { "epoch": 0.08968939533174049, "grad_norm": 0.40699633955955505, "learning_rate": 0.0001596661101836394, "loss": 1.8231, "step": 610 }, { "epoch": 0.0898364271273663, "grad_norm": 0.39903053641319275, "learning_rate": 0.00015959933222036728, "loss": 1.6128, "step": 611 }, { "epoch": 0.0899834589229921, "grad_norm": 0.583209753036499, "learning_rate": 0.00015953255425709518, "loss": 1.2414, "step": 612 }, { "epoch": 0.0901304907186179, "grad_norm": 0.7532734274864197, "learning_rate": 0.00015946577629382305, "loss": 1.3373, "step": 613 }, { "epoch": 0.0902775225142437, "grad_norm": 0.4871058166027069, "learning_rate": 0.00015939899833055093, "loss": 1.1426, "step": 614 }, { "epoch": 0.09042455430986951, "grad_norm": 0.6422872543334961, "learning_rate": 0.0001593322203672788, "loss": 1.0983, "step": 615 }, { "epoch": 0.09057158610549532, "grad_norm": 0.5069561004638672, "learning_rate": 0.0001592654424040067, "loss": 1.1273, "step": 616 }, { "epoch": 0.09071861790112112, "grad_norm": 0.6385093331336975, "learning_rate": 0.00015919866444073457, "loss": 1.3715, "step": 617 }, { "epoch": 0.09086564969674692, "grad_norm": 0.5059258341789246, "learning_rate": 0.00015913188647746245, "loss": 1.134, "step": 618 }, { "epoch": 0.09101268149237272, "grad_norm": 0.6410172581672668, "learning_rate": 0.00015906510851419032, "loss": 1.2382, "step": 619 }, { "epoch": 0.09115971328799853, "grad_norm": 0.4717838168144226, "learning_rate": 0.0001589983305509182, "loss": 1.4687, "step": 620 }, { "epoch": 0.09130674508362434, "grad_norm": 0.5636394023895264, "learning_rate": 0.0001589315525876461, "loss": 1.2162, "step": 621 }, { "epoch": 0.09145377687925013, "grad_norm": 0.4774598181247711, "learning_rate": 0.00015886477462437397, "loss": 1.6575, "step": 622 }, { "epoch": 0.09160080867487594, "grad_norm": 0.46673914790153503, "learning_rate": 0.00015879799666110184, "loss": 1.2408, "step": 623 }, { "epoch": 0.09174784047050175, "grad_norm": 0.6798071265220642, "learning_rate": 0.0001587312186978297, "loss": 1.3629, "step": 624 }, { "epoch": 0.09189487226612755, "grad_norm": 0.45843780040740967, "learning_rate": 0.00015866444073455758, "loss": 1.392, "step": 625 }, { "epoch": 0.09204190406175336, "grad_norm": 0.5451468229293823, "learning_rate": 0.00015859766277128548, "loss": 1.1586, "step": 626 }, { "epoch": 0.09218893585737915, "grad_norm": 0.8034792542457581, "learning_rate": 0.00015853088480801336, "loss": 1.0205, "step": 627 }, { "epoch": 0.09233596765300496, "grad_norm": 0.704975426197052, "learning_rate": 0.00015846410684474123, "loss": 0.9977, "step": 628 }, { "epoch": 0.09248299944863077, "grad_norm": 0.43815019726753235, "learning_rate": 0.00015839732888146913, "loss": 1.0557, "step": 629 }, { "epoch": 0.09263003124425657, "grad_norm": 0.7682969570159912, "learning_rate": 0.000158330550918197, "loss": 1.1262, "step": 630 }, { "epoch": 0.09277706303988237, "grad_norm": 0.4929516017436981, "learning_rate": 0.00015826377295492488, "loss": 1.3849, "step": 631 }, { "epoch": 0.09292409483550818, "grad_norm": 0.5096595287322998, "learning_rate": 0.00015819699499165278, "loss": 1.2274, "step": 632 }, { "epoch": 0.09307112663113398, "grad_norm": 0.7020508646965027, "learning_rate": 0.00015813021702838065, "loss": 1.4217, "step": 633 }, { "epoch": 0.09321815842675979, "grad_norm": 0.415518194437027, "learning_rate": 0.00015806343906510852, "loss": 1.8826, "step": 634 }, { "epoch": 0.0933651902223856, "grad_norm": 0.6651912331581116, "learning_rate": 0.0001579966611018364, "loss": 1.1566, "step": 635 }, { "epoch": 0.09351222201801139, "grad_norm": 0.46642985939979553, "learning_rate": 0.00015792988313856427, "loss": 1.5473, "step": 636 }, { "epoch": 0.0936592538136372, "grad_norm": 0.5820549726486206, "learning_rate": 0.00015786310517529217, "loss": 1.2693, "step": 637 }, { "epoch": 0.093806285609263, "grad_norm": 0.5801737308502197, "learning_rate": 0.00015779632721202004, "loss": 0.9255, "step": 638 }, { "epoch": 0.09395331740488881, "grad_norm": 0.600013792514801, "learning_rate": 0.00015772954924874792, "loss": 1.3387, "step": 639 }, { "epoch": 0.0941003492005146, "grad_norm": 0.5010610222816467, "learning_rate": 0.0001576627712854758, "loss": 1.3351, "step": 640 }, { "epoch": 0.09424738099614041, "grad_norm": 0.4180939495563507, "learning_rate": 0.00015759599332220366, "loss": 1.2966, "step": 641 }, { "epoch": 0.09439441279176622, "grad_norm": 0.4787381887435913, "learning_rate": 0.00015752921535893156, "loss": 1.733, "step": 642 }, { "epoch": 0.09454144458739203, "grad_norm": 0.46225428581237793, "learning_rate": 0.00015746243739565944, "loss": 1.2075, "step": 643 }, { "epoch": 0.09468847638301783, "grad_norm": 0.5345350503921509, "learning_rate": 0.0001573956594323873, "loss": 1.3557, "step": 644 }, { "epoch": 0.09483550817864363, "grad_norm": 0.44449546933174133, "learning_rate": 0.0001573288814691152, "loss": 1.766, "step": 645 }, { "epoch": 0.09498253997426943, "grad_norm": 0.5039743781089783, "learning_rate": 0.00015726210350584308, "loss": 1.2092, "step": 646 }, { "epoch": 0.09512957176989524, "grad_norm": 0.6591314673423767, "learning_rate": 0.00015719532554257096, "loss": 1.2992, "step": 647 }, { "epoch": 0.09527660356552105, "grad_norm": 0.4307759702205658, "learning_rate": 0.00015712854757929886, "loss": 1.2763, "step": 648 }, { "epoch": 0.09542363536114684, "grad_norm": 0.43409353494644165, "learning_rate": 0.00015706176961602673, "loss": 1.7056, "step": 649 }, { "epoch": 0.09557066715677265, "grad_norm": 0.6908676624298096, "learning_rate": 0.0001569949916527546, "loss": 1.8502, "step": 650 }, { "epoch": 0.09571769895239846, "grad_norm": 0.5742613673210144, "learning_rate": 0.00015692821368948248, "loss": 1.2255, "step": 651 }, { "epoch": 0.09586473074802426, "grad_norm": 0.5643760561943054, "learning_rate": 0.00015686143572621035, "loss": 0.9409, "step": 652 }, { "epoch": 0.09601176254365007, "grad_norm": 0.6045966744422913, "learning_rate": 0.00015679465776293825, "loss": 1.2363, "step": 653 }, { "epoch": 0.09615879433927586, "grad_norm": 0.48991382122039795, "learning_rate": 0.00015672787979966612, "loss": 1.2591, "step": 654 }, { "epoch": 0.09630582613490167, "grad_norm": 0.4303016662597656, "learning_rate": 0.000156661101836394, "loss": 0.9655, "step": 655 }, { "epoch": 0.09645285793052748, "grad_norm": 0.6320937871932983, "learning_rate": 0.00015659432387312187, "loss": 1.1696, "step": 656 }, { "epoch": 0.09659988972615328, "grad_norm": 0.43597787618637085, "learning_rate": 0.00015652754590984974, "loss": 1.1204, "step": 657 }, { "epoch": 0.09674692152177908, "grad_norm": 0.608848512172699, "learning_rate": 0.00015646076794657764, "loss": 1.411, "step": 658 }, { "epoch": 0.09689395331740489, "grad_norm": 0.46236202120780945, "learning_rate": 0.00015639398998330552, "loss": 1.398, "step": 659 }, { "epoch": 0.09704098511303069, "grad_norm": 0.5226595401763916, "learning_rate": 0.0001563272120200334, "loss": 1.2486, "step": 660 }, { "epoch": 0.0971880169086565, "grad_norm": 0.5582869052886963, "learning_rate": 0.00015626043405676126, "loss": 1.1966, "step": 661 }, { "epoch": 0.0973350487042823, "grad_norm": 0.5684506893157959, "learning_rate": 0.00015619365609348916, "loss": 1.1202, "step": 662 }, { "epoch": 0.0974820804999081, "grad_norm": 0.6276257634162903, "learning_rate": 0.00015612687813021704, "loss": 1.2578, "step": 663 }, { "epoch": 0.0976291122955339, "grad_norm": 0.5350037813186646, "learning_rate": 0.00015606010016694494, "loss": 1.4939, "step": 664 }, { "epoch": 0.09777614409115971, "grad_norm": 0.7043134570121765, "learning_rate": 0.0001559933222036728, "loss": 1.6236, "step": 665 }, { "epoch": 0.09792317588678552, "grad_norm": 0.5847034454345703, "learning_rate": 0.00015592654424040068, "loss": 1.5448, "step": 666 }, { "epoch": 0.09807020768241133, "grad_norm": 0.46139127016067505, "learning_rate": 0.00015585976627712856, "loss": 1.9231, "step": 667 }, { "epoch": 0.09821723947803712, "grad_norm": 0.6011927127838135, "learning_rate": 0.00015579298831385643, "loss": 1.2604, "step": 668 }, { "epoch": 0.09836427127366293, "grad_norm": 0.5749934911727905, "learning_rate": 0.00015572621035058433, "loss": 1.4123, "step": 669 }, { "epoch": 0.09851130306928874, "grad_norm": 0.5384885668754578, "learning_rate": 0.0001556594323873122, "loss": 1.1751, "step": 670 }, { "epoch": 0.09865833486491454, "grad_norm": 0.45651814341545105, "learning_rate": 0.00015559265442404007, "loss": 1.6448, "step": 671 }, { "epoch": 0.09880536666054034, "grad_norm": 0.6385384798049927, "learning_rate": 0.00015552587646076795, "loss": 1.4721, "step": 672 }, { "epoch": 0.09895239845616614, "grad_norm": 0.626849353313446, "learning_rate": 0.00015545909849749582, "loss": 1.0771, "step": 673 }, { "epoch": 0.09909943025179195, "grad_norm": 0.47956568002700806, "learning_rate": 0.00015539232053422372, "loss": 1.3684, "step": 674 }, { "epoch": 0.09924646204741776, "grad_norm": 0.5215425491333008, "learning_rate": 0.0001553255425709516, "loss": 1.204, "step": 675 }, { "epoch": 0.09939349384304356, "grad_norm": 0.46913138031959534, "learning_rate": 0.00015525876460767947, "loss": 1.3675, "step": 676 }, { "epoch": 0.09954052563866936, "grad_norm": 0.44034120440483093, "learning_rate": 0.00015519198664440734, "loss": 1.6767, "step": 677 }, { "epoch": 0.09968755743429517, "grad_norm": 0.6659665107727051, "learning_rate": 0.00015512520868113521, "loss": 0.9156, "step": 678 }, { "epoch": 0.09983458922992097, "grad_norm": 0.6420289278030396, "learning_rate": 0.00015505843071786311, "loss": 1.2903, "step": 679 }, { "epoch": 0.09998162102554678, "grad_norm": 0.7700021266937256, "learning_rate": 0.000154991652754591, "loss": 1.0531, "step": 680 }, { "epoch": 0.10012865282117257, "grad_norm": 0.5232059359550476, "learning_rate": 0.0001549248747913189, "loss": 1.3923, "step": 681 }, { "epoch": 0.10027568461679838, "grad_norm": 0.5167177319526672, "learning_rate": 0.00015485809682804676, "loss": 1.3116, "step": 682 }, { "epoch": 0.10042271641242419, "grad_norm": 0.5574501752853394, "learning_rate": 0.00015479131886477463, "loss": 1.1033, "step": 683 }, { "epoch": 0.10056974820805, "grad_norm": 0.45674586296081543, "learning_rate": 0.0001547245409015025, "loss": 1.2536, "step": 684 }, { "epoch": 0.1007167800036758, "grad_norm": 0.6583337783813477, "learning_rate": 0.0001546577629382304, "loss": 1.1964, "step": 685 }, { "epoch": 0.1008638117993016, "grad_norm": 0.5463740825653076, "learning_rate": 0.00015459098497495828, "loss": 1.4065, "step": 686 }, { "epoch": 0.1010108435949274, "grad_norm": 0.6360630393028259, "learning_rate": 0.00015452420701168615, "loss": 1.2217, "step": 687 }, { "epoch": 0.10115787539055321, "grad_norm": 0.4514288604259491, "learning_rate": 0.00015445742904841403, "loss": 1.3425, "step": 688 }, { "epoch": 0.10130490718617902, "grad_norm": 0.5193662643432617, "learning_rate": 0.0001543906510851419, "loss": 1.2223, "step": 689 }, { "epoch": 0.10145193898180481, "grad_norm": 0.48822471499443054, "learning_rate": 0.0001543238731218698, "loss": 1.1896, "step": 690 }, { "epoch": 0.10159897077743062, "grad_norm": 0.65366530418396, "learning_rate": 0.00015425709515859767, "loss": 1.1265, "step": 691 }, { "epoch": 0.10174600257305642, "grad_norm": 0.6045045852661133, "learning_rate": 0.00015419031719532555, "loss": 1.4774, "step": 692 }, { "epoch": 0.10189303436868223, "grad_norm": 0.44756537675857544, "learning_rate": 0.00015412353923205342, "loss": 1.8474, "step": 693 }, { "epoch": 0.10204006616430804, "grad_norm": 0.6567626595497131, "learning_rate": 0.0001540567612687813, "loss": 1.332, "step": 694 }, { "epoch": 0.10218709795993383, "grad_norm": 0.6473841667175293, "learning_rate": 0.0001539899833055092, "loss": 1.1196, "step": 695 }, { "epoch": 0.10233412975555964, "grad_norm": 0.5628229379653931, "learning_rate": 0.00015392320534223707, "loss": 1.1581, "step": 696 }, { "epoch": 0.10248116155118545, "grad_norm": 0.5379781723022461, "learning_rate": 0.00015385642737896494, "loss": 1.556, "step": 697 }, { "epoch": 0.10262819334681125, "grad_norm": 0.48810526728630066, "learning_rate": 0.00015378964941569284, "loss": 1.5018, "step": 698 }, { "epoch": 0.10277522514243705, "grad_norm": 0.40852096676826477, "learning_rate": 0.0001537228714524207, "loss": 1.4647, "step": 699 }, { "epoch": 0.10292225693806285, "grad_norm": 0.7071953415870667, "learning_rate": 0.00015365609348914859, "loss": 1.1792, "step": 700 }, { "epoch": 0.10306928873368866, "grad_norm": 0.6942100524902344, "learning_rate": 0.00015358931552587649, "loss": 1.1759, "step": 701 }, { "epoch": 0.10321632052931447, "grad_norm": 0.5391016602516174, "learning_rate": 0.00015352253756260436, "loss": 1.491, "step": 702 }, { "epoch": 0.10336335232494027, "grad_norm": 0.44915392994880676, "learning_rate": 0.00015345575959933223, "loss": 1.6336, "step": 703 }, { "epoch": 0.10351038412056607, "grad_norm": 0.38828638195991516, "learning_rate": 0.0001533889816360601, "loss": 1.3726, "step": 704 }, { "epoch": 0.10365741591619188, "grad_norm": 0.4992864727973938, "learning_rate": 0.00015332220367278798, "loss": 1.5914, "step": 705 }, { "epoch": 0.10380444771181768, "grad_norm": 0.3929794728755951, "learning_rate": 0.00015325542570951588, "loss": 1.2501, "step": 706 }, { "epoch": 0.10395147950744349, "grad_norm": 0.4103076159954071, "learning_rate": 0.00015318864774624375, "loss": 1.234, "step": 707 }, { "epoch": 0.10409851130306928, "grad_norm": 0.43139058351516724, "learning_rate": 0.00015312186978297163, "loss": 1.3546, "step": 708 }, { "epoch": 0.10424554309869509, "grad_norm": 0.35200977325439453, "learning_rate": 0.0001530550918196995, "loss": 1.07, "step": 709 }, { "epoch": 0.1043925748943209, "grad_norm": 0.6628250479698181, "learning_rate": 0.00015298831385642737, "loss": 0.9454, "step": 710 }, { "epoch": 0.1045396066899467, "grad_norm": 0.5189528465270996, "learning_rate": 0.00015292153589315527, "loss": 1.3778, "step": 711 }, { "epoch": 0.10468663848557251, "grad_norm": 0.559195876121521, "learning_rate": 0.00015285475792988315, "loss": 1.2308, "step": 712 }, { "epoch": 0.1048336702811983, "grad_norm": 0.4907131791114807, "learning_rate": 0.00015278797996661102, "loss": 1.0122, "step": 713 }, { "epoch": 0.10498070207682411, "grad_norm": 0.6189465522766113, "learning_rate": 0.0001527212020033389, "loss": 1.3823, "step": 714 }, { "epoch": 0.10512773387244992, "grad_norm": 0.5016515851020813, "learning_rate": 0.0001526544240400668, "loss": 1.5659, "step": 715 }, { "epoch": 0.10527476566807573, "grad_norm": 0.5774978399276733, "learning_rate": 0.00015258764607679466, "loss": 1.1925, "step": 716 }, { "epoch": 0.10542179746370152, "grad_norm": 0.5959893465042114, "learning_rate": 0.00015252086811352257, "loss": 1.1662, "step": 717 }, { "epoch": 0.10556882925932733, "grad_norm": 0.7381499409675598, "learning_rate": 0.00015245409015025044, "loss": 1.032, "step": 718 }, { "epoch": 0.10571586105495313, "grad_norm": 0.5526353716850281, "learning_rate": 0.0001523873121869783, "loss": 1.4882, "step": 719 }, { "epoch": 0.10586289285057894, "grad_norm": 0.43210935592651367, "learning_rate": 0.00015232053422370618, "loss": 1.251, "step": 720 }, { "epoch": 0.10600992464620475, "grad_norm": 0.589648425579071, "learning_rate": 0.00015225375626043406, "loss": 1.7402, "step": 721 }, { "epoch": 0.10615695644183054, "grad_norm": 1.2557355165481567, "learning_rate": 0.00015218697829716196, "loss": 1.1224, "step": 722 }, { "epoch": 0.10630398823745635, "grad_norm": 0.6244363188743591, "learning_rate": 0.00015212020033388983, "loss": 1.0016, "step": 723 }, { "epoch": 0.10645102003308216, "grad_norm": 0.5662913918495178, "learning_rate": 0.0001520534223706177, "loss": 0.9124, "step": 724 }, { "epoch": 0.10659805182870796, "grad_norm": 0.5600042343139648, "learning_rate": 0.00015198664440734558, "loss": 1.1562, "step": 725 }, { "epoch": 0.10674508362433376, "grad_norm": 0.5924863815307617, "learning_rate": 0.00015191986644407345, "loss": 1.1697, "step": 726 }, { "epoch": 0.10689211541995956, "grad_norm": 0.4873445928096771, "learning_rate": 0.00015185308848080135, "loss": 1.3554, "step": 727 }, { "epoch": 0.10703914721558537, "grad_norm": 0.4491806626319885, "learning_rate": 0.00015178631051752922, "loss": 1.7225, "step": 728 }, { "epoch": 0.10718617901121118, "grad_norm": 0.5205637812614441, "learning_rate": 0.0001517195325542571, "loss": 1.2141, "step": 729 }, { "epoch": 0.10733321080683698, "grad_norm": 0.6513786911964417, "learning_rate": 0.00015165275459098497, "loss": 1.376, "step": 730 }, { "epoch": 0.10748024260246278, "grad_norm": 0.624433159828186, "learning_rate": 0.00015158597662771284, "loss": 1.2061, "step": 731 }, { "epoch": 0.10762727439808858, "grad_norm": 0.419197142124176, "learning_rate": 0.00015151919866444074, "loss": 1.3063, "step": 732 }, { "epoch": 0.10777430619371439, "grad_norm": 0.4165155291557312, "learning_rate": 0.00015145242070116862, "loss": 1.1529, "step": 733 }, { "epoch": 0.1079213379893402, "grad_norm": 0.569884181022644, "learning_rate": 0.00015138564273789652, "loss": 1.4072, "step": 734 }, { "epoch": 0.10806836978496599, "grad_norm": 0.41433191299438477, "learning_rate": 0.0001513188647746244, "loss": 0.7399, "step": 735 }, { "epoch": 0.1082154015805918, "grad_norm": 0.6291340589523315, "learning_rate": 0.00015125208681135226, "loss": 1.2894, "step": 736 }, { "epoch": 0.1083624333762176, "grad_norm": 0.5361130237579346, "learning_rate": 0.00015118530884808014, "loss": 1.2269, "step": 737 }, { "epoch": 0.10850946517184341, "grad_norm": 0.6141095757484436, "learning_rate": 0.00015111853088480804, "loss": 1.4044, "step": 738 }, { "epoch": 0.10865649696746922, "grad_norm": 0.6296900510787964, "learning_rate": 0.0001510517529215359, "loss": 1.1991, "step": 739 }, { "epoch": 0.10880352876309501, "grad_norm": 0.43720030784606934, "learning_rate": 0.00015098497495826378, "loss": 1.2246, "step": 740 }, { "epoch": 0.10895056055872082, "grad_norm": 0.5120818018913269, "learning_rate": 0.00015091819699499166, "loss": 1.1351, "step": 741 }, { "epoch": 0.10909759235434663, "grad_norm": 0.6182043552398682, "learning_rate": 0.00015085141903171953, "loss": 1.3016, "step": 742 }, { "epoch": 0.10924462414997244, "grad_norm": 0.4294649362564087, "learning_rate": 0.00015078464106844743, "loss": 1.7865, "step": 743 }, { "epoch": 0.10939165594559823, "grad_norm": 0.4663722515106201, "learning_rate": 0.0001507178631051753, "loss": 1.793, "step": 744 }, { "epoch": 0.10953868774122404, "grad_norm": 0.4685133993625641, "learning_rate": 0.00015065108514190318, "loss": 1.2504, "step": 745 }, { "epoch": 0.10968571953684984, "grad_norm": 0.48968514800071716, "learning_rate": 0.00015058430717863105, "loss": 1.4785, "step": 746 }, { "epoch": 0.10983275133247565, "grad_norm": 0.45944273471832275, "learning_rate": 0.00015051752921535892, "loss": 1.4569, "step": 747 }, { "epoch": 0.10997978312810146, "grad_norm": 0.6717365980148315, "learning_rate": 0.0001504507512520868, "loss": 1.1552, "step": 748 }, { "epoch": 0.11012681492372725, "grad_norm": 0.4457140266895294, "learning_rate": 0.0001503839732888147, "loss": 1.2675, "step": 749 }, { "epoch": 0.11027384671935306, "grad_norm": 0.6207507252693176, "learning_rate": 0.00015031719532554257, "loss": 1.2078, "step": 750 }, { "epoch": 0.11042087851497887, "grad_norm": 0.45481055974960327, "learning_rate": 0.00015025041736227047, "loss": 1.2123, "step": 751 }, { "epoch": 0.11056791031060467, "grad_norm": 0.5013782978057861, "learning_rate": 0.00015018363939899834, "loss": 1.076, "step": 752 }, { "epoch": 0.11071494210623047, "grad_norm": 0.7226582765579224, "learning_rate": 0.00015011686143572622, "loss": 0.9882, "step": 753 }, { "epoch": 0.11086197390185627, "grad_norm": 0.491376131772995, "learning_rate": 0.00015005008347245412, "loss": 1.2969, "step": 754 }, { "epoch": 0.11100900569748208, "grad_norm": 0.6381293535232544, "learning_rate": 0.000149983305509182, "loss": 1.0952, "step": 755 }, { "epoch": 0.11115603749310789, "grad_norm": 0.5294733643531799, "learning_rate": 0.00014991652754590986, "loss": 0.8462, "step": 756 }, { "epoch": 0.1113030692887337, "grad_norm": 0.5914049744606018, "learning_rate": 0.00014984974958263774, "loss": 1.0333, "step": 757 }, { "epoch": 0.11145010108435949, "grad_norm": 0.5034297108650208, "learning_rate": 0.0001497829716193656, "loss": 1.3188, "step": 758 }, { "epoch": 0.1115971328799853, "grad_norm": 0.396068811416626, "learning_rate": 0.0001497161936560935, "loss": 1.2218, "step": 759 }, { "epoch": 0.1117441646756111, "grad_norm": 0.5489635467529297, "learning_rate": 0.00014964941569282138, "loss": 1.1101, "step": 760 }, { "epoch": 0.11189119647123691, "grad_norm": 0.49752339720726013, "learning_rate": 0.00014958263772954926, "loss": 1.4655, "step": 761 }, { "epoch": 0.1120382282668627, "grad_norm": 0.5310470461845398, "learning_rate": 0.00014951585976627713, "loss": 1.1117, "step": 762 }, { "epoch": 0.11218526006248851, "grad_norm": 0.5919608473777771, "learning_rate": 0.000149449081803005, "loss": 1.3553, "step": 763 }, { "epoch": 0.11233229185811432, "grad_norm": 0.6174759864807129, "learning_rate": 0.00014938230383973287, "loss": 1.5342, "step": 764 }, { "epoch": 0.11247932365374012, "grad_norm": 0.6445675492286682, "learning_rate": 0.00014931552587646077, "loss": 1.1189, "step": 765 }, { "epoch": 0.11262635544936593, "grad_norm": 0.5434446930885315, "learning_rate": 0.00014924874791318865, "loss": 1.0057, "step": 766 }, { "epoch": 0.11277338724499172, "grad_norm": 0.573020339012146, "learning_rate": 0.00014918196994991652, "loss": 1.0097, "step": 767 }, { "epoch": 0.11292041904061753, "grad_norm": 0.5362363457679749, "learning_rate": 0.00014911519198664442, "loss": 1.1168, "step": 768 }, { "epoch": 0.11306745083624334, "grad_norm": 0.6165186166763306, "learning_rate": 0.0001490484140233723, "loss": 1.1217, "step": 769 }, { "epoch": 0.11321448263186915, "grad_norm": 0.48349079489707947, "learning_rate": 0.0001489816360601002, "loss": 1.3877, "step": 770 }, { "epoch": 0.11336151442749495, "grad_norm": 0.7998025417327881, "learning_rate": 0.00014891485809682807, "loss": 1.2927, "step": 771 }, { "epoch": 0.11350854622312075, "grad_norm": 0.5059000849723816, "learning_rate": 0.00014884808013355594, "loss": 1.4661, "step": 772 }, { "epoch": 0.11365557801874655, "grad_norm": 0.5860148072242737, "learning_rate": 0.00014878130217028381, "loss": 1.345, "step": 773 }, { "epoch": 0.11380260981437236, "grad_norm": 0.6572959423065186, "learning_rate": 0.0001487145242070117, "loss": 1.2979, "step": 774 }, { "epoch": 0.11394964160999817, "grad_norm": 0.6161307692527771, "learning_rate": 0.0001486477462437396, "loss": 1.1127, "step": 775 }, { "epoch": 0.11409667340562396, "grad_norm": 0.5237761735916138, "learning_rate": 0.00014858096828046746, "loss": 1.051, "step": 776 }, { "epoch": 0.11424370520124977, "grad_norm": 0.5981360077857971, "learning_rate": 0.00014851419031719533, "loss": 1.5189, "step": 777 }, { "epoch": 0.11439073699687557, "grad_norm": 0.5877708196640015, "learning_rate": 0.0001484474123539232, "loss": 1.3694, "step": 778 }, { "epoch": 0.11453776879250138, "grad_norm": 0.4468958377838135, "learning_rate": 0.00014838063439065108, "loss": 1.8201, "step": 779 }, { "epoch": 0.11468480058812719, "grad_norm": 0.6049508452415466, "learning_rate": 0.00014831385642737895, "loss": 1.4189, "step": 780 }, { "epoch": 0.11483183238375298, "grad_norm": 0.589998722076416, "learning_rate": 0.00014824707846410685, "loss": 1.4226, "step": 781 }, { "epoch": 0.11497886417937879, "grad_norm": 0.4943135976791382, "learning_rate": 0.00014818030050083473, "loss": 1.2337, "step": 782 }, { "epoch": 0.1151258959750046, "grad_norm": 0.7313138842582703, "learning_rate": 0.0001481135225375626, "loss": 1.0656, "step": 783 }, { "epoch": 0.1152729277706304, "grad_norm": 0.5975912809371948, "learning_rate": 0.00014804674457429047, "loss": 1.5249, "step": 784 }, { "epoch": 0.1154199595662562, "grad_norm": 0.5475925803184509, "learning_rate": 0.00014797996661101837, "loss": 1.4819, "step": 785 }, { "epoch": 0.115566991361882, "grad_norm": 0.5659377574920654, "learning_rate": 0.00014791318864774625, "loss": 1.1707, "step": 786 }, { "epoch": 0.11571402315750781, "grad_norm": 0.8224787712097168, "learning_rate": 0.00014784641068447415, "loss": 1.0305, "step": 787 }, { "epoch": 0.11586105495313362, "grad_norm": 0.49119213223457336, "learning_rate": 0.00014777963272120202, "loss": 1.1997, "step": 788 }, { "epoch": 0.11600808674875943, "grad_norm": 0.5221545100212097, "learning_rate": 0.0001477128547579299, "loss": 1.2311, "step": 789 }, { "epoch": 0.11615511854438522, "grad_norm": 0.5897625088691711, "learning_rate": 0.00014764607679465777, "loss": 1.3297, "step": 790 }, { "epoch": 0.11630215034001103, "grad_norm": 1.1512343883514404, "learning_rate": 0.00014757929883138567, "loss": 0.9149, "step": 791 }, { "epoch": 0.11644918213563683, "grad_norm": 0.5175444483757019, "learning_rate": 0.00014751252086811354, "loss": 1.2534, "step": 792 }, { "epoch": 0.11659621393126264, "grad_norm": 0.5744941234588623, "learning_rate": 0.0001474457429048414, "loss": 1.1329, "step": 793 }, { "epoch": 0.11674324572688843, "grad_norm": 0.6011309623718262, "learning_rate": 0.00014737896494156929, "loss": 1.0114, "step": 794 }, { "epoch": 0.11689027752251424, "grad_norm": 0.6664937138557434, "learning_rate": 0.00014731218697829716, "loss": 1.1738, "step": 795 }, { "epoch": 0.11703730931814005, "grad_norm": 0.7699421048164368, "learning_rate": 0.00014724540901502506, "loss": 1.1134, "step": 796 }, { "epoch": 0.11718434111376586, "grad_norm": 0.7136440873146057, "learning_rate": 0.00014717863105175293, "loss": 0.9301, "step": 797 }, { "epoch": 0.11733137290939166, "grad_norm": 0.6351363062858582, "learning_rate": 0.0001471118530884808, "loss": 0.9044, "step": 798 }, { "epoch": 0.11747840470501746, "grad_norm": 0.475035697221756, "learning_rate": 0.00014704507512520868, "loss": 1.8611, "step": 799 }, { "epoch": 0.11762543650064326, "grad_norm": 0.66331946849823, "learning_rate": 0.00014697829716193655, "loss": 0.9928, "step": 800 }, { "epoch": 0.11777246829626907, "grad_norm": 0.6757659316062927, "learning_rate": 0.00014691151919866443, "loss": 1.1414, "step": 801 }, { "epoch": 0.11791950009189488, "grad_norm": 0.49898049235343933, "learning_rate": 0.00014684474123539233, "loss": 1.589, "step": 802 }, { "epoch": 0.11806653188752067, "grad_norm": 0.7410119771957397, "learning_rate": 0.0001467779632721202, "loss": 0.8837, "step": 803 }, { "epoch": 0.11821356368314648, "grad_norm": 0.6845124959945679, "learning_rate": 0.0001467111853088481, "loss": 1.1766, "step": 804 }, { "epoch": 0.11836059547877228, "grad_norm": 0.4272838532924652, "learning_rate": 0.00014664440734557597, "loss": 1.3503, "step": 805 }, { "epoch": 0.11850762727439809, "grad_norm": 0.6288057565689087, "learning_rate": 0.00014657762938230385, "loss": 1.2255, "step": 806 }, { "epoch": 0.1186546590700239, "grad_norm": 0.4245305061340332, "learning_rate": 0.00014651085141903175, "loss": 1.3904, "step": 807 }, { "epoch": 0.11880169086564969, "grad_norm": 0.4360218942165375, "learning_rate": 0.00014644407345575962, "loss": 1.2633, "step": 808 }, { "epoch": 0.1189487226612755, "grad_norm": 0.7621588706970215, "learning_rate": 0.0001463772954924875, "loss": 0.8696, "step": 809 }, { "epoch": 0.1190957544569013, "grad_norm": 0.6901448965072632, "learning_rate": 0.00014631051752921536, "loss": 1.2726, "step": 810 }, { "epoch": 0.11924278625252711, "grad_norm": 0.5166795253753662, "learning_rate": 0.00014624373956594324, "loss": 1.007, "step": 811 }, { "epoch": 0.11938981804815291, "grad_norm": 0.6011196374893188, "learning_rate": 0.00014617696160267114, "loss": 1.4274, "step": 812 }, { "epoch": 0.11953684984377871, "grad_norm": 0.5435547828674316, "learning_rate": 0.000146110183639399, "loss": 1.2847, "step": 813 }, { "epoch": 0.11968388163940452, "grad_norm": 0.6570006012916565, "learning_rate": 0.00014604340567612688, "loss": 1.1201, "step": 814 }, { "epoch": 0.11983091343503033, "grad_norm": 0.7304107546806335, "learning_rate": 0.00014597662771285476, "loss": 0.8767, "step": 815 }, { "epoch": 0.11997794523065614, "grad_norm": 0.6253023147583008, "learning_rate": 0.00014590984974958263, "loss": 1.4602, "step": 816 }, { "epoch": 0.12012497702628193, "grad_norm": 0.6119077205657959, "learning_rate": 0.0001458430717863105, "loss": 1.094, "step": 817 }, { "epoch": 0.12027200882190774, "grad_norm": 0.430504709482193, "learning_rate": 0.0001457762938230384, "loss": 0.9678, "step": 818 }, { "epoch": 0.12041904061753354, "grad_norm": 0.5409938097000122, "learning_rate": 0.00014570951585976628, "loss": 1.3086, "step": 819 }, { "epoch": 0.12056607241315935, "grad_norm": 0.5546008944511414, "learning_rate": 0.00014564273789649415, "loss": 0.9129, "step": 820 }, { "epoch": 0.12071310420878514, "grad_norm": 0.5367364883422852, "learning_rate": 0.00014557595993322205, "loss": 1.2793, "step": 821 }, { "epoch": 0.12086013600441095, "grad_norm": 0.41216298937797546, "learning_rate": 0.00014550918196994992, "loss": 1.0369, "step": 822 }, { "epoch": 0.12100716780003676, "grad_norm": 0.7783049941062927, "learning_rate": 0.00014544240400667782, "loss": 1.0701, "step": 823 }, { "epoch": 0.12115419959566257, "grad_norm": 0.4086431562900543, "learning_rate": 0.0001453756260434057, "loss": 1.662, "step": 824 }, { "epoch": 0.12130123139128837, "grad_norm": 0.6304144263267517, "learning_rate": 0.00014530884808013357, "loss": 1.106, "step": 825 }, { "epoch": 0.12144826318691417, "grad_norm": 0.8506725430488586, "learning_rate": 0.00014524207011686144, "loss": 1.4108, "step": 826 }, { "epoch": 0.12159529498253997, "grad_norm": 0.5488341450691223, "learning_rate": 0.00014517529215358932, "loss": 1.5488, "step": 827 }, { "epoch": 0.12174232677816578, "grad_norm": 0.5151952505111694, "learning_rate": 0.00014510851419031722, "loss": 1.4245, "step": 828 }, { "epoch": 0.12188935857379159, "grad_norm": 0.7420411109924316, "learning_rate": 0.0001450417362270451, "loss": 1.3031, "step": 829 }, { "epoch": 0.12203639036941738, "grad_norm": 0.7200042605400085, "learning_rate": 0.00014497495826377296, "loss": 0.8745, "step": 830 }, { "epoch": 0.12218342216504319, "grad_norm": 0.7255342602729797, "learning_rate": 0.00014490818030050084, "loss": 1.3494, "step": 831 }, { "epoch": 0.122330453960669, "grad_norm": 0.5927593111991882, "learning_rate": 0.0001448414023372287, "loss": 1.3136, "step": 832 }, { "epoch": 0.1224774857562948, "grad_norm": 0.4968041181564331, "learning_rate": 0.00014477462437395658, "loss": 1.4257, "step": 833 }, { "epoch": 0.12262451755192061, "grad_norm": 0.5780361294746399, "learning_rate": 0.00014470784641068448, "loss": 1.3216, "step": 834 }, { "epoch": 0.1227715493475464, "grad_norm": 0.5474163293838501, "learning_rate": 0.00014464106844741236, "loss": 1.1575, "step": 835 }, { "epoch": 0.12291858114317221, "grad_norm": 0.5380105376243591, "learning_rate": 0.00014457429048414023, "loss": 1.1252, "step": 836 }, { "epoch": 0.12306561293879802, "grad_norm": 0.6055735349655151, "learning_rate": 0.0001445075125208681, "loss": 1.1859, "step": 837 }, { "epoch": 0.12321264473442382, "grad_norm": 0.43329918384552, "learning_rate": 0.000144440734557596, "loss": 1.6037, "step": 838 }, { "epoch": 0.12335967653004962, "grad_norm": 0.753786563873291, "learning_rate": 0.00014437395659432388, "loss": 1.3935, "step": 839 }, { "epoch": 0.12350670832567542, "grad_norm": 0.6531333923339844, "learning_rate": 0.00014430717863105178, "loss": 1.3739, "step": 840 }, { "epoch": 0.12365374012130123, "grad_norm": 0.8071143627166748, "learning_rate": 0.00014424040066777965, "loss": 0.9471, "step": 841 }, { "epoch": 0.12380077191692704, "grad_norm": 0.4852818250656128, "learning_rate": 0.00014417362270450752, "loss": 1.2046, "step": 842 }, { "epoch": 0.12394780371255285, "grad_norm": 0.42448416352272034, "learning_rate": 0.0001441068447412354, "loss": 1.3317, "step": 843 }, { "epoch": 0.12409483550817864, "grad_norm": 0.4530593454837799, "learning_rate": 0.0001440400667779633, "loss": 1.4696, "step": 844 }, { "epoch": 0.12424186730380445, "grad_norm": 0.5867094397544861, "learning_rate": 0.00014397328881469117, "loss": 1.0139, "step": 845 }, { "epoch": 0.12438889909943025, "grad_norm": 0.5414953231811523, "learning_rate": 0.00014390651085141904, "loss": 1.4152, "step": 846 }, { "epoch": 0.12453593089505606, "grad_norm": 0.5484989285469055, "learning_rate": 0.00014383973288814692, "loss": 1.2765, "step": 847 }, { "epoch": 0.12468296269068185, "grad_norm": 0.7573115229606628, "learning_rate": 0.0001437729549248748, "loss": 1.4154, "step": 848 }, { "epoch": 0.12482999448630766, "grad_norm": 0.4389816224575043, "learning_rate": 0.00014370617696160266, "loss": 1.3386, "step": 849 }, { "epoch": 0.12497702628193347, "grad_norm": 0.7418072819709778, "learning_rate": 0.00014363939899833056, "loss": 1.0939, "step": 850 }, { "epoch": 0.12512405807755927, "grad_norm": 0.6662271022796631, "learning_rate": 0.00014357262103505844, "loss": 1.5436, "step": 851 }, { "epoch": 0.12527108987318508, "grad_norm": 0.6033502221107483, "learning_rate": 0.0001435058430717863, "loss": 1.1399, "step": 852 }, { "epoch": 0.1254181216688109, "grad_norm": 0.6707456111907959, "learning_rate": 0.00014343906510851418, "loss": 1.1557, "step": 853 }, { "epoch": 0.1255651534644367, "grad_norm": 0.377358615398407, "learning_rate": 0.00014337228714524205, "loss": 1.4793, "step": 854 }, { "epoch": 0.12571218526006248, "grad_norm": 0.5811101198196411, "learning_rate": 0.00014330550918196995, "loss": 1.1604, "step": 855 }, { "epoch": 0.12585921705568828, "grad_norm": 0.5319637060165405, "learning_rate": 0.00014323873121869783, "loss": 1.0821, "step": 856 }, { "epoch": 0.1260062488513141, "grad_norm": 0.6233212351799011, "learning_rate": 0.00014317195325542573, "loss": 1.3295, "step": 857 }, { "epoch": 0.1261532806469399, "grad_norm": 0.6596187949180603, "learning_rate": 0.0001431051752921536, "loss": 1.3465, "step": 858 }, { "epoch": 0.1263003124425657, "grad_norm": 0.663522481918335, "learning_rate": 0.00014303839732888147, "loss": 1.1052, "step": 859 }, { "epoch": 0.1264473442381915, "grad_norm": 0.6268435716629028, "learning_rate": 0.00014297161936560937, "loss": 1.4237, "step": 860 }, { "epoch": 0.12659437603381732, "grad_norm": 0.7019825577735901, "learning_rate": 0.00014290484140233725, "loss": 1.2048, "step": 861 }, { "epoch": 0.12674140782944313, "grad_norm": 0.4844971299171448, "learning_rate": 0.00014283806343906512, "loss": 1.3788, "step": 862 }, { "epoch": 0.12688843962506893, "grad_norm": 0.42103111743927, "learning_rate": 0.000142771285475793, "loss": 1.3265, "step": 863 }, { "epoch": 0.1270354714206947, "grad_norm": 0.4094342887401581, "learning_rate": 0.00014270450751252087, "loss": 1.4227, "step": 864 }, { "epoch": 0.12718250321632052, "grad_norm": 0.48595064878463745, "learning_rate": 0.00014263772954924874, "loss": 1.7664, "step": 865 }, { "epoch": 0.12732953501194633, "grad_norm": 0.4998999834060669, "learning_rate": 0.00014257095158597664, "loss": 1.3724, "step": 866 }, { "epoch": 0.12747656680757213, "grad_norm": 0.5115623474121094, "learning_rate": 0.00014250417362270451, "loss": 1.1154, "step": 867 }, { "epoch": 0.12762359860319794, "grad_norm": 0.4588727355003357, "learning_rate": 0.0001424373956594324, "loss": 1.7463, "step": 868 }, { "epoch": 0.12777063039882375, "grad_norm": 0.5067816972732544, "learning_rate": 0.00014237061769616026, "loss": 1.1012, "step": 869 }, { "epoch": 0.12791766219444956, "grad_norm": 0.4989425539970398, "learning_rate": 0.00014230383973288813, "loss": 1.0762, "step": 870 }, { "epoch": 0.12806469399007536, "grad_norm": 0.35230669379234314, "learning_rate": 0.00014223706176961603, "loss": 1.8486, "step": 871 }, { "epoch": 0.12821172578570117, "grad_norm": 0.6957948207855225, "learning_rate": 0.0001421702838063439, "loss": 1.2183, "step": 872 }, { "epoch": 0.12835875758132695, "grad_norm": 0.5365024209022522, "learning_rate": 0.00014210350584307178, "loss": 1.8339, "step": 873 }, { "epoch": 0.12850578937695276, "grad_norm": 0.503851592540741, "learning_rate": 0.00014203672787979968, "loss": 1.2495, "step": 874 }, { "epoch": 0.12865282117257856, "grad_norm": 0.6007227897644043, "learning_rate": 0.00014196994991652755, "loss": 1.4142, "step": 875 }, { "epoch": 0.12879985296820437, "grad_norm": 0.507002055644989, "learning_rate": 0.00014190317195325545, "loss": 1.3017, "step": 876 }, { "epoch": 0.12894688476383018, "grad_norm": 0.6605522036552429, "learning_rate": 0.00014183639398998333, "loss": 1.1574, "step": 877 }, { "epoch": 0.12909391655945598, "grad_norm": 0.4164111018180847, "learning_rate": 0.0001417696160267112, "loss": 1.5058, "step": 878 }, { "epoch": 0.1292409483550818, "grad_norm": 0.5471015572547913, "learning_rate": 0.00014170283806343907, "loss": 1.3244, "step": 879 }, { "epoch": 0.1293879801507076, "grad_norm": 0.6898621320724487, "learning_rate": 0.00014163606010016695, "loss": 1.1993, "step": 880 }, { "epoch": 0.1295350119463334, "grad_norm": 0.4740403890609741, "learning_rate": 0.00014156928213689482, "loss": 1.1256, "step": 881 }, { "epoch": 0.12968204374195919, "grad_norm": 0.5113210082054138, "learning_rate": 0.00014150250417362272, "loss": 1.2069, "step": 882 }, { "epoch": 0.129829075537585, "grad_norm": 0.4742903709411621, "learning_rate": 0.0001414357262103506, "loss": 1.1397, "step": 883 }, { "epoch": 0.1299761073332108, "grad_norm": 0.5628926157951355, "learning_rate": 0.00014136894824707847, "loss": 0.9799, "step": 884 }, { "epoch": 0.1301231391288366, "grad_norm": 0.5508236885070801, "learning_rate": 0.00014130217028380634, "loss": 0.9254, "step": 885 }, { "epoch": 0.13027017092446241, "grad_norm": 0.6175186038017273, "learning_rate": 0.0001412353923205342, "loss": 1.1867, "step": 886 }, { "epoch": 0.13041720272008822, "grad_norm": 0.5116024613380432, "learning_rate": 0.0001411686143572621, "loss": 1.2721, "step": 887 }, { "epoch": 0.13056423451571403, "grad_norm": 0.3625930845737457, "learning_rate": 0.00014110183639398999, "loss": 1.2892, "step": 888 }, { "epoch": 0.13071126631133984, "grad_norm": 0.5908060073852539, "learning_rate": 0.00014103505843071786, "loss": 1.4457, "step": 889 }, { "epoch": 0.13085829810696564, "grad_norm": 0.5285727977752686, "learning_rate": 0.00014096828046744576, "loss": 0.9473, "step": 890 }, { "epoch": 0.13100532990259142, "grad_norm": 0.6402186155319214, "learning_rate": 0.00014090150250417363, "loss": 1.2179, "step": 891 }, { "epoch": 0.13115236169821723, "grad_norm": 0.8047806620597839, "learning_rate": 0.00014083472454090153, "loss": 1.1751, "step": 892 }, { "epoch": 0.13129939349384304, "grad_norm": 0.6143875122070312, "learning_rate": 0.0001407679465776294, "loss": 1.3013, "step": 893 }, { "epoch": 0.13144642528946884, "grad_norm": 0.7654747366905212, "learning_rate": 0.00014070116861435728, "loss": 1.2366, "step": 894 }, { "epoch": 0.13159345708509465, "grad_norm": 0.6778333783149719, "learning_rate": 0.00014063439065108515, "loss": 1.1046, "step": 895 }, { "epoch": 0.13174048888072046, "grad_norm": 0.5273547172546387, "learning_rate": 0.00014056761268781303, "loss": 1.074, "step": 896 }, { "epoch": 0.13188752067634626, "grad_norm": 0.4679009020328522, "learning_rate": 0.0001405008347245409, "loss": 1.2419, "step": 897 }, { "epoch": 0.13203455247197207, "grad_norm": 0.47258812189102173, "learning_rate": 0.0001404340567612688, "loss": 1.2677, "step": 898 }, { "epoch": 0.13218158426759788, "grad_norm": 0.6930595636367798, "learning_rate": 0.00014036727879799667, "loss": 1.216, "step": 899 }, { "epoch": 0.13232861606322366, "grad_norm": 0.4715471565723419, "learning_rate": 0.00014030050083472454, "loss": 1.2155, "step": 900 }, { "epoch": 0.13247564785884947, "grad_norm": 0.5584536194801331, "learning_rate": 0.00014023372287145242, "loss": 1.2358, "step": 901 }, { "epoch": 0.13262267965447527, "grad_norm": 0.5100342631340027, "learning_rate": 0.0001401669449081803, "loss": 1.3926, "step": 902 }, { "epoch": 0.13276971145010108, "grad_norm": 0.7324742078781128, "learning_rate": 0.0001401001669449082, "loss": 1.3244, "step": 903 }, { "epoch": 0.1329167432457269, "grad_norm": 0.6234824061393738, "learning_rate": 0.00014003338898163606, "loss": 1.1377, "step": 904 }, { "epoch": 0.1330637750413527, "grad_norm": 0.666627824306488, "learning_rate": 0.00013996661101836394, "loss": 1.1467, "step": 905 }, { "epoch": 0.1332108068369785, "grad_norm": 0.5073150992393494, "learning_rate": 0.0001398998330550918, "loss": 0.9931, "step": 906 }, { "epoch": 0.1333578386326043, "grad_norm": 0.5775906443595886, "learning_rate": 0.0001398330550918197, "loss": 1.0256, "step": 907 }, { "epoch": 0.13350487042823012, "grad_norm": 0.5022817254066467, "learning_rate": 0.00013976627712854758, "loss": 1.5966, "step": 908 }, { "epoch": 0.1336519022238559, "grad_norm": 0.5626740455627441, "learning_rate": 0.00013969949916527548, "loss": 1.2568, "step": 909 }, { "epoch": 0.1337989340194817, "grad_norm": 0.41345053911209106, "learning_rate": 0.00013963272120200336, "loss": 0.9232, "step": 910 }, { "epoch": 0.1339459658151075, "grad_norm": 0.5167744159698486, "learning_rate": 0.00013956594323873123, "loss": 1.21, "step": 911 }, { "epoch": 0.13409299761073332, "grad_norm": 0.7588237524032593, "learning_rate": 0.0001394991652754591, "loss": 1.0734, "step": 912 }, { "epoch": 0.13424002940635912, "grad_norm": 0.6192663311958313, "learning_rate": 0.00013943238731218698, "loss": 1.0383, "step": 913 }, { "epoch": 0.13438706120198493, "grad_norm": 0.6501249074935913, "learning_rate": 0.00013936560934891488, "loss": 0.9988, "step": 914 }, { "epoch": 0.13453409299761074, "grad_norm": 0.5095698237419128, "learning_rate": 0.00013929883138564275, "loss": 1.0933, "step": 915 }, { "epoch": 0.13468112479323655, "grad_norm": 0.38994666934013367, "learning_rate": 0.00013923205342237062, "loss": 1.4904, "step": 916 }, { "epoch": 0.13482815658886235, "grad_norm": 0.6880642175674438, "learning_rate": 0.0001391652754590985, "loss": 1.2445, "step": 917 }, { "epoch": 0.13497518838448813, "grad_norm": 0.5167624950408936, "learning_rate": 0.00013909849749582637, "loss": 1.3389, "step": 918 }, { "epoch": 0.13512222018011394, "grad_norm": 0.6010488867759705, "learning_rate": 0.00013903171953255427, "loss": 1.1215, "step": 919 }, { "epoch": 0.13526925197573975, "grad_norm": 0.6071981191635132, "learning_rate": 0.00013896494156928214, "loss": 1.5345, "step": 920 }, { "epoch": 0.13541628377136555, "grad_norm": 0.4612002372741699, "learning_rate": 0.00013889816360601002, "loss": 1.8756, "step": 921 }, { "epoch": 0.13556331556699136, "grad_norm": 0.6840288043022156, "learning_rate": 0.0001388313856427379, "loss": 1.2829, "step": 922 }, { "epoch": 0.13571034736261717, "grad_norm": 0.7206271290779114, "learning_rate": 0.00013876460767946576, "loss": 1.1772, "step": 923 }, { "epoch": 0.13585737915824297, "grad_norm": 0.5482673048973083, "learning_rate": 0.00013869782971619366, "loss": 1.3724, "step": 924 }, { "epoch": 0.13600441095386878, "grad_norm": 0.5575976967811584, "learning_rate": 0.00013863105175292154, "loss": 1.1324, "step": 925 }, { "epoch": 0.1361514427494946, "grad_norm": 0.6315499544143677, "learning_rate": 0.00013856427378964944, "loss": 1.0173, "step": 926 }, { "epoch": 0.1362984745451204, "grad_norm": 0.47668710350990295, "learning_rate": 0.0001384974958263773, "loss": 1.0918, "step": 927 }, { "epoch": 0.13644550634074618, "grad_norm": 0.3959556818008423, "learning_rate": 0.00013843071786310518, "loss": 1.5871, "step": 928 }, { "epoch": 0.13659253813637198, "grad_norm": 0.481631875038147, "learning_rate": 0.00013836393989983308, "loss": 1.2306, "step": 929 }, { "epoch": 0.1367395699319978, "grad_norm": 0.5838369727134705, "learning_rate": 0.00013829716193656096, "loss": 1.1516, "step": 930 }, { "epoch": 0.1368866017276236, "grad_norm": 0.7173163294792175, "learning_rate": 0.00013823038397328883, "loss": 1.307, "step": 931 }, { "epoch": 0.1370336335232494, "grad_norm": 0.6298332810401917, "learning_rate": 0.0001381636060100167, "loss": 1.117, "step": 932 }, { "epoch": 0.1371806653188752, "grad_norm": 0.4059942960739136, "learning_rate": 0.00013809682804674458, "loss": 1.4407, "step": 933 }, { "epoch": 0.13732769711450102, "grad_norm": 0.6634607911109924, "learning_rate": 0.00013803005008347245, "loss": 1.3121, "step": 934 }, { "epoch": 0.13747472891012683, "grad_norm": 0.46383485198020935, "learning_rate": 0.00013796327212020035, "loss": 1.2783, "step": 935 }, { "epoch": 0.13762176070575263, "grad_norm": 0.6389989256858826, "learning_rate": 0.00013789649415692822, "loss": 1.3141, "step": 936 }, { "epoch": 0.1377687925013784, "grad_norm": 0.4315093457698822, "learning_rate": 0.0001378297161936561, "loss": 1.6222, "step": 937 }, { "epoch": 0.13791582429700422, "grad_norm": 0.7251682877540588, "learning_rate": 0.00013776293823038397, "loss": 1.4325, "step": 938 }, { "epoch": 0.13806285609263003, "grad_norm": 0.6229355931282043, "learning_rate": 0.00013769616026711184, "loss": 1.2004, "step": 939 }, { "epoch": 0.13820988788825583, "grad_norm": 0.4562682807445526, "learning_rate": 0.00013762938230383974, "loss": 1.0189, "step": 940 }, { "epoch": 0.13835691968388164, "grad_norm": 0.5207197666168213, "learning_rate": 0.00013756260434056762, "loss": 1.2944, "step": 941 }, { "epoch": 0.13850395147950745, "grad_norm": 0.5064826011657715, "learning_rate": 0.0001374958263772955, "loss": 1.4037, "step": 942 }, { "epoch": 0.13865098327513325, "grad_norm": 0.819783091545105, "learning_rate": 0.0001374290484140234, "loss": 1.3005, "step": 943 }, { "epoch": 0.13879801507075906, "grad_norm": 0.5328880548477173, "learning_rate": 0.00013736227045075126, "loss": 1.3295, "step": 944 }, { "epoch": 0.13894504686638487, "grad_norm": 0.49416014552116394, "learning_rate": 0.00013729549248747916, "loss": 1.0662, "step": 945 }, { "epoch": 0.13909207866201065, "grad_norm": 0.5055965185165405, "learning_rate": 0.00013722871452420704, "loss": 1.6386, "step": 946 }, { "epoch": 0.13923911045763646, "grad_norm": 0.5027865767478943, "learning_rate": 0.0001371619365609349, "loss": 1.0696, "step": 947 }, { "epoch": 0.13938614225326226, "grad_norm": 0.6743228435516357, "learning_rate": 0.00013709515859766278, "loss": 1.0138, "step": 948 }, { "epoch": 0.13953317404888807, "grad_norm": 0.47505462169647217, "learning_rate": 0.00013702838063439065, "loss": 1.2772, "step": 949 }, { "epoch": 0.13968020584451388, "grad_norm": 0.4047502279281616, "learning_rate": 0.00013696160267111853, "loss": 1.5884, "step": 950 }, { "epoch": 0.13982723764013968, "grad_norm": 0.646245002746582, "learning_rate": 0.00013689482470784643, "loss": 0.8867, "step": 951 }, { "epoch": 0.1399742694357655, "grad_norm": 0.49004390835762024, "learning_rate": 0.0001368280467445743, "loss": 1.4435, "step": 952 }, { "epoch": 0.1401213012313913, "grad_norm": 0.6042948365211487, "learning_rate": 0.00013676126878130217, "loss": 1.0944, "step": 953 }, { "epoch": 0.1402683330270171, "grad_norm": 0.5220313668251038, "learning_rate": 0.00013669449081803005, "loss": 1.4696, "step": 954 }, { "epoch": 0.14041536482264289, "grad_norm": 0.4691425859928131, "learning_rate": 0.00013662771285475792, "loss": 1.32, "step": 955 }, { "epoch": 0.1405623966182687, "grad_norm": 0.455734521150589, "learning_rate": 0.00013656093489148582, "loss": 1.4728, "step": 956 }, { "epoch": 0.1407094284138945, "grad_norm": 0.4720694422721863, "learning_rate": 0.0001364941569282137, "loss": 1.642, "step": 957 }, { "epoch": 0.1408564602095203, "grad_norm": 0.620746910572052, "learning_rate": 0.00013642737896494157, "loss": 1.0819, "step": 958 }, { "epoch": 0.1410034920051461, "grad_norm": 0.5684634447097778, "learning_rate": 0.00013636060100166944, "loss": 1.2001, "step": 959 }, { "epoch": 0.14115052380077192, "grad_norm": 0.47597062587738037, "learning_rate": 0.00013629382303839734, "loss": 1.1085, "step": 960 }, { "epoch": 0.14129755559639773, "grad_norm": 0.626440703868866, "learning_rate": 0.00013622704507512521, "loss": 1.1774, "step": 961 }, { "epoch": 0.14144458739202354, "grad_norm": 0.4925549626350403, "learning_rate": 0.00013616026711185311, "loss": 1.0271, "step": 962 }, { "epoch": 0.14159161918764934, "grad_norm": 0.5324035882949829, "learning_rate": 0.000136093489148581, "loss": 1.1025, "step": 963 }, { "epoch": 0.14173865098327512, "grad_norm": 0.5632936358451843, "learning_rate": 0.00013602671118530886, "loss": 1.5265, "step": 964 }, { "epoch": 0.14188568277890093, "grad_norm": 0.5520371198654175, "learning_rate": 0.00013595993322203673, "loss": 1.2105, "step": 965 }, { "epoch": 0.14203271457452674, "grad_norm": 0.6133058667182922, "learning_rate": 0.0001358931552587646, "loss": 1.2932, "step": 966 }, { "epoch": 0.14217974637015254, "grad_norm": 0.5591563582420349, "learning_rate": 0.0001358263772954925, "loss": 1.3038, "step": 967 }, { "epoch": 0.14232677816577835, "grad_norm": 0.5529747009277344, "learning_rate": 0.00013575959933222038, "loss": 1.454, "step": 968 }, { "epoch": 0.14247380996140416, "grad_norm": 0.5065269470214844, "learning_rate": 0.00013569282136894825, "loss": 1.422, "step": 969 }, { "epoch": 0.14262084175702996, "grad_norm": 0.5579943060874939, "learning_rate": 0.00013562604340567613, "loss": 1.3398, "step": 970 }, { "epoch": 0.14276787355265577, "grad_norm": 0.5450153946876526, "learning_rate": 0.000135559265442404, "loss": 1.1754, "step": 971 }, { "epoch": 0.14291490534828158, "grad_norm": 0.49201053380966187, "learning_rate": 0.0001354924874791319, "loss": 0.9478, "step": 972 }, { "epoch": 0.14306193714390736, "grad_norm": 0.661428689956665, "learning_rate": 0.00013542570951585977, "loss": 1.2155, "step": 973 }, { "epoch": 0.14320896893953317, "grad_norm": 0.5616013407707214, "learning_rate": 0.00013535893155258765, "loss": 1.4573, "step": 974 }, { "epoch": 0.14335600073515897, "grad_norm": 0.6430721282958984, "learning_rate": 0.00013529215358931552, "loss": 1.2661, "step": 975 }, { "epoch": 0.14350303253078478, "grad_norm": 0.6334646940231323, "learning_rate": 0.0001352253756260434, "loss": 1.1794, "step": 976 }, { "epoch": 0.1436500643264106, "grad_norm": 0.4562377333641052, "learning_rate": 0.0001351585976627713, "loss": 1.3215, "step": 977 }, { "epoch": 0.1437970961220364, "grad_norm": 0.7308101058006287, "learning_rate": 0.00013509181969949917, "loss": 1.1451, "step": 978 }, { "epoch": 0.1439441279176622, "grad_norm": 0.53978031873703, "learning_rate": 0.00013502504173622707, "loss": 1.2886, "step": 979 }, { "epoch": 0.144091159713288, "grad_norm": 0.545616865158081, "learning_rate": 0.00013495826377295494, "loss": 1.2208, "step": 980 }, { "epoch": 0.14423819150891382, "grad_norm": 0.4624879062175751, "learning_rate": 0.0001348914858096828, "loss": 1.7155, "step": 981 }, { "epoch": 0.1443852233045396, "grad_norm": 0.5349489450454712, "learning_rate": 0.00013482470784641069, "loss": 1.26, "step": 982 }, { "epoch": 0.1445322551001654, "grad_norm": 0.5202991366386414, "learning_rate": 0.00013475792988313859, "loss": 1.1822, "step": 983 }, { "epoch": 0.1446792868957912, "grad_norm": 0.510486364364624, "learning_rate": 0.00013469115191986646, "loss": 1.1683, "step": 984 }, { "epoch": 0.14482631869141702, "grad_norm": 0.4625108540058136, "learning_rate": 0.00013462437395659433, "loss": 1.214, "step": 985 }, { "epoch": 0.14497335048704282, "grad_norm": 0.5003244280815125, "learning_rate": 0.0001345575959933222, "loss": 1.452, "step": 986 }, { "epoch": 0.14512038228266863, "grad_norm": 0.3843102753162384, "learning_rate": 0.00013449081803005008, "loss": 1.4191, "step": 987 }, { "epoch": 0.14526741407829444, "grad_norm": 0.5317103266716003, "learning_rate": 0.00013442404006677798, "loss": 1.2452, "step": 988 }, { "epoch": 0.14541444587392025, "grad_norm": 0.5441875457763672, "learning_rate": 0.00013435726210350585, "loss": 1.2087, "step": 989 }, { "epoch": 0.14556147766954605, "grad_norm": 0.39978983998298645, "learning_rate": 0.00013429048414023373, "loss": 1.6565, "step": 990 }, { "epoch": 0.14570850946517183, "grad_norm": 0.5103518962860107, "learning_rate": 0.0001342237061769616, "loss": 1.1014, "step": 991 }, { "epoch": 0.14585554126079764, "grad_norm": 0.5771106481552124, "learning_rate": 0.00013415692821368947, "loss": 1.2467, "step": 992 }, { "epoch": 0.14600257305642345, "grad_norm": 0.5237480998039246, "learning_rate": 0.00013409015025041737, "loss": 0.939, "step": 993 }, { "epoch": 0.14614960485204925, "grad_norm": 0.6796878576278687, "learning_rate": 0.00013402337228714524, "loss": 1.2905, "step": 994 }, { "epoch": 0.14629663664767506, "grad_norm": 0.498824805021286, "learning_rate": 0.00013395659432387312, "loss": 1.051, "step": 995 }, { "epoch": 0.14644366844330087, "grad_norm": 0.44443193078041077, "learning_rate": 0.00013388981636060102, "loss": 1.2344, "step": 996 }, { "epoch": 0.14659070023892667, "grad_norm": 0.5269819498062134, "learning_rate": 0.0001338230383973289, "loss": 1.3881, "step": 997 }, { "epoch": 0.14673773203455248, "grad_norm": 0.6225183010101318, "learning_rate": 0.00013375626043405676, "loss": 1.1648, "step": 998 }, { "epoch": 0.1468847638301783, "grad_norm": 0.4918833076953888, "learning_rate": 0.00013368948247078466, "loss": 1.2032, "step": 999 }, { "epoch": 0.14703179562580407, "grad_norm": 0.3845730423927307, "learning_rate": 0.00013362270450751254, "loss": 1.3826, "step": 1000 }, { "epoch": 0.14717882742142988, "grad_norm": 0.5400959849357605, "learning_rate": 0.0001335559265442404, "loss": 1.2959, "step": 1001 }, { "epoch": 0.14732585921705568, "grad_norm": 0.4961782693862915, "learning_rate": 0.00013348914858096828, "loss": 0.914, "step": 1002 }, { "epoch": 0.1474728910126815, "grad_norm": 0.5829025506973267, "learning_rate": 0.00013342237061769616, "loss": 1.0295, "step": 1003 }, { "epoch": 0.1476199228083073, "grad_norm": 0.553892195224762, "learning_rate": 0.00013335559265442406, "loss": 1.4651, "step": 1004 }, { "epoch": 0.1477669546039331, "grad_norm": 0.6935555934906006, "learning_rate": 0.00013328881469115193, "loss": 1.2555, "step": 1005 }, { "epoch": 0.1479139863995589, "grad_norm": 0.5046958327293396, "learning_rate": 0.0001332220367278798, "loss": 1.189, "step": 1006 }, { "epoch": 0.14806101819518472, "grad_norm": 0.6350305676460266, "learning_rate": 0.00013315525876460768, "loss": 1.3897, "step": 1007 }, { "epoch": 0.14820804999081053, "grad_norm": 0.48995882272720337, "learning_rate": 0.00013308848080133555, "loss": 1.3988, "step": 1008 }, { "epoch": 0.1483550817864363, "grad_norm": 0.568442702293396, "learning_rate": 0.00013302170283806345, "loss": 1.1006, "step": 1009 }, { "epoch": 0.1485021135820621, "grad_norm": 0.5701233744621277, "learning_rate": 0.00013295492487479132, "loss": 1.0193, "step": 1010 }, { "epoch": 0.14864914537768792, "grad_norm": 0.6046086549758911, "learning_rate": 0.0001328881469115192, "loss": 1.339, "step": 1011 }, { "epoch": 0.14879617717331373, "grad_norm": 0.656521737575531, "learning_rate": 0.00013282136894824707, "loss": 1.0364, "step": 1012 }, { "epoch": 0.14894320896893953, "grad_norm": 0.4741376042366028, "learning_rate": 0.00013275459098497497, "loss": 1.2844, "step": 1013 }, { "epoch": 0.14909024076456534, "grad_norm": 0.7059875130653381, "learning_rate": 0.00013268781302170284, "loss": 1.0558, "step": 1014 }, { "epoch": 0.14923727256019115, "grad_norm": 0.5298685431480408, "learning_rate": 0.00013262103505843074, "loss": 1.2938, "step": 1015 }, { "epoch": 0.14938430435581695, "grad_norm": 0.5314155220985413, "learning_rate": 0.00013255425709515862, "loss": 1.37, "step": 1016 }, { "epoch": 0.14953133615144276, "grad_norm": 0.500091552734375, "learning_rate": 0.0001324874791318865, "loss": 1.2089, "step": 1017 }, { "epoch": 0.14967836794706854, "grad_norm": 0.6238353848457336, "learning_rate": 0.00013242070116861436, "loss": 1.1169, "step": 1018 }, { "epoch": 0.14982539974269435, "grad_norm": 0.6503071784973145, "learning_rate": 0.00013235392320534224, "loss": 1.4372, "step": 1019 }, { "epoch": 0.14997243153832016, "grad_norm": 0.5923087000846863, "learning_rate": 0.00013228714524207014, "loss": 1.0303, "step": 1020 }, { "epoch": 0.15011946333394596, "grad_norm": 0.529041051864624, "learning_rate": 0.000132220367278798, "loss": 1.7465, "step": 1021 }, { "epoch": 0.15026649512957177, "grad_norm": 0.4500576853752136, "learning_rate": 0.00013215358931552588, "loss": 1.5359, "step": 1022 }, { "epoch": 0.15041352692519758, "grad_norm": 0.5380405187606812, "learning_rate": 0.00013208681135225376, "loss": 1.3511, "step": 1023 }, { "epoch": 0.15056055872082338, "grad_norm": 0.5435751676559448, "learning_rate": 0.00013202003338898163, "loss": 1.6295, "step": 1024 }, { "epoch": 0.1507075905164492, "grad_norm": 0.7070488929748535, "learning_rate": 0.00013195325542570953, "loss": 1.0193, "step": 1025 }, { "epoch": 0.150854622312075, "grad_norm": 0.4302719533443451, "learning_rate": 0.0001318864774624374, "loss": 1.0477, "step": 1026 }, { "epoch": 0.15100165410770078, "grad_norm": 0.5257076621055603, "learning_rate": 0.00013181969949916528, "loss": 1.0912, "step": 1027 }, { "epoch": 0.15114868590332659, "grad_norm": 0.46294650435447693, "learning_rate": 0.00013175292153589315, "loss": 1.1265, "step": 1028 }, { "epoch": 0.1512957176989524, "grad_norm": 0.43706071376800537, "learning_rate": 0.00013168614357262102, "loss": 1.9003, "step": 1029 }, { "epoch": 0.1514427494945782, "grad_norm": 0.5900750160217285, "learning_rate": 0.00013161936560934892, "loss": 1.5098, "step": 1030 }, { "epoch": 0.151589781290204, "grad_norm": 0.5476238131523132, "learning_rate": 0.0001315525876460768, "loss": 1.6786, "step": 1031 }, { "epoch": 0.1517368130858298, "grad_norm": 0.6057994961738586, "learning_rate": 0.0001314858096828047, "loss": 1.4379, "step": 1032 }, { "epoch": 0.15188384488145562, "grad_norm": 0.507075846195221, "learning_rate": 0.00013141903171953257, "loss": 1.1808, "step": 1033 }, { "epoch": 0.15203087667708143, "grad_norm": 0.5763989686965942, "learning_rate": 0.00013135225375626044, "loss": 1.059, "step": 1034 }, { "epoch": 0.15217790847270724, "grad_norm": 0.5421633124351501, "learning_rate": 0.00013128547579298832, "loss": 1.1944, "step": 1035 }, { "epoch": 0.15232494026833301, "grad_norm": 0.811051070690155, "learning_rate": 0.00013121869782971622, "loss": 0.983, "step": 1036 }, { "epoch": 0.15247197206395882, "grad_norm": 0.43127065896987915, "learning_rate": 0.0001311519198664441, "loss": 1.5171, "step": 1037 }, { "epoch": 0.15261900385958463, "grad_norm": 0.4577949047088623, "learning_rate": 0.00013108514190317196, "loss": 1.0712, "step": 1038 }, { "epoch": 0.15276603565521044, "grad_norm": 0.39859458804130554, "learning_rate": 0.00013101836393989983, "loss": 1.0956, "step": 1039 }, { "epoch": 0.15291306745083624, "grad_norm": 0.5528180003166199, "learning_rate": 0.0001309515859766277, "loss": 1.0687, "step": 1040 }, { "epoch": 0.15306009924646205, "grad_norm": 0.5343106389045715, "learning_rate": 0.0001308848080133556, "loss": 1.0488, "step": 1041 }, { "epoch": 0.15320713104208786, "grad_norm": 0.6988528966903687, "learning_rate": 0.00013081803005008348, "loss": 1.0205, "step": 1042 }, { "epoch": 0.15335416283771366, "grad_norm": 0.46427690982818604, "learning_rate": 0.00013075125208681135, "loss": 1.0811, "step": 1043 }, { "epoch": 0.15350119463333947, "grad_norm": 0.46749570965766907, "learning_rate": 0.00013068447412353923, "loss": 1.0124, "step": 1044 }, { "epoch": 0.15364822642896525, "grad_norm": 0.5215660929679871, "learning_rate": 0.0001306176961602671, "loss": 1.1504, "step": 1045 }, { "epoch": 0.15379525822459106, "grad_norm": 0.4712725281715393, "learning_rate": 0.00013055091819699497, "loss": 1.4088, "step": 1046 }, { "epoch": 0.15394229002021687, "grad_norm": 0.6024323105812073, "learning_rate": 0.00013048414023372287, "loss": 1.1859, "step": 1047 }, { "epoch": 0.15408932181584267, "grad_norm": 0.6194908022880554, "learning_rate": 0.00013041736227045075, "loss": 1.2924, "step": 1048 }, { "epoch": 0.15423635361146848, "grad_norm": 0.4938560724258423, "learning_rate": 0.00013035058430717865, "loss": 1.6458, "step": 1049 }, { "epoch": 0.1543833854070943, "grad_norm": 0.4069878160953522, "learning_rate": 0.00013028380634390652, "loss": 1.6524, "step": 1050 }, { "epoch": 0.1545304172027201, "grad_norm": 0.4865352213382721, "learning_rate": 0.0001302170283806344, "loss": 1.2902, "step": 1051 }, { "epoch": 0.1546774489983459, "grad_norm": 0.6292316913604736, "learning_rate": 0.0001301502504173623, "loss": 1.147, "step": 1052 }, { "epoch": 0.1548244807939717, "grad_norm": 0.702254056930542, "learning_rate": 0.00013008347245409017, "loss": 1.1063, "step": 1053 }, { "epoch": 0.1549715125895975, "grad_norm": 0.5226403474807739, "learning_rate": 0.00013001669449081804, "loss": 1.628, "step": 1054 }, { "epoch": 0.1551185443852233, "grad_norm": 0.5333846211433411, "learning_rate": 0.00012994991652754591, "loss": 1.1857, "step": 1055 }, { "epoch": 0.1552655761808491, "grad_norm": 0.550828218460083, "learning_rate": 0.0001298831385642738, "loss": 1.624, "step": 1056 }, { "epoch": 0.1554126079764749, "grad_norm": 0.4944291114807129, "learning_rate": 0.0001298163606010017, "loss": 1.0578, "step": 1057 }, { "epoch": 0.15555963977210072, "grad_norm": 0.5132856369018555, "learning_rate": 0.00012974958263772956, "loss": 1.0857, "step": 1058 }, { "epoch": 0.15570667156772652, "grad_norm": 0.4348208010196686, "learning_rate": 0.00012968280467445743, "loss": 1.3012, "step": 1059 }, { "epoch": 0.15585370336335233, "grad_norm": 0.6335738301277161, "learning_rate": 0.0001296160267111853, "loss": 1.315, "step": 1060 }, { "epoch": 0.15600073515897814, "grad_norm": 0.6427448391914368, "learning_rate": 0.00012954924874791318, "loss": 1.1712, "step": 1061 }, { "epoch": 0.15614776695460394, "grad_norm": 0.5747259259223938, "learning_rate": 0.00012948247078464108, "loss": 1.1646, "step": 1062 }, { "epoch": 0.15629479875022972, "grad_norm": 0.3738940954208374, "learning_rate": 0.00012941569282136895, "loss": 1.2773, "step": 1063 }, { "epoch": 0.15644183054585553, "grad_norm": 0.6192455291748047, "learning_rate": 0.00012934891485809683, "loss": 1.0472, "step": 1064 }, { "epoch": 0.15658886234148134, "grad_norm": 0.38795310258865356, "learning_rate": 0.0001292821368948247, "loss": 1.5403, "step": 1065 }, { "epoch": 0.15673589413710715, "grad_norm": 0.5876004695892334, "learning_rate": 0.0001292153589315526, "loss": 0.9717, "step": 1066 }, { "epoch": 0.15688292593273295, "grad_norm": 0.5744571089744568, "learning_rate": 0.00012914858096828047, "loss": 1.2863, "step": 1067 }, { "epoch": 0.15702995772835876, "grad_norm": 0.47682496905326843, "learning_rate": 0.00012908180300500837, "loss": 1.3852, "step": 1068 }, { "epoch": 0.15717698952398457, "grad_norm": 0.45354029536247253, "learning_rate": 0.00012901502504173625, "loss": 1.3806, "step": 1069 }, { "epoch": 0.15732402131961037, "grad_norm": 0.6915189027786255, "learning_rate": 0.00012894824707846412, "loss": 1.0346, "step": 1070 }, { "epoch": 0.15747105311523618, "grad_norm": 0.6614881157875061, "learning_rate": 0.000128881469115192, "loss": 1.5615, "step": 1071 }, { "epoch": 0.15761808491086196, "grad_norm": 0.626767098903656, "learning_rate": 0.00012881469115191987, "loss": 0.9743, "step": 1072 }, { "epoch": 0.15776511670648777, "grad_norm": 0.6090566515922546, "learning_rate": 0.00012874791318864777, "loss": 1.4947, "step": 1073 }, { "epoch": 0.15791214850211358, "grad_norm": 0.677635669708252, "learning_rate": 0.00012868113522537564, "loss": 1.452, "step": 1074 }, { "epoch": 0.15805918029773938, "grad_norm": 0.6001076698303223, "learning_rate": 0.0001286143572621035, "loss": 0.965, "step": 1075 }, { "epoch": 0.1582062120933652, "grad_norm": 0.596094012260437, "learning_rate": 0.00012854757929883139, "loss": 1.106, "step": 1076 }, { "epoch": 0.158353243888991, "grad_norm": 0.4844059944152832, "learning_rate": 0.00012848080133555926, "loss": 1.607, "step": 1077 }, { "epoch": 0.1585002756846168, "grad_norm": 0.4016835391521454, "learning_rate": 0.00012841402337228716, "loss": 1.5224, "step": 1078 }, { "epoch": 0.1586473074802426, "grad_norm": 0.6012298464775085, "learning_rate": 0.00012834724540901503, "loss": 1.3375, "step": 1079 }, { "epoch": 0.15879433927586842, "grad_norm": 0.7424705624580383, "learning_rate": 0.0001282804674457429, "loss": 1.2053, "step": 1080 }, { "epoch": 0.1589413710714942, "grad_norm": 0.4664783775806427, "learning_rate": 0.00012821368948247078, "loss": 1.1711, "step": 1081 }, { "epoch": 0.15908840286712, "grad_norm": 0.6334554553031921, "learning_rate": 0.00012814691151919865, "loss": 1.1871, "step": 1082 }, { "epoch": 0.1592354346627458, "grad_norm": 0.3945555090904236, "learning_rate": 0.00012808013355592655, "loss": 1.5365, "step": 1083 }, { "epoch": 0.15938246645837162, "grad_norm": 0.570341944694519, "learning_rate": 0.00012801335559265442, "loss": 1.0281, "step": 1084 }, { "epoch": 0.15952949825399743, "grad_norm": 0.3910903036594391, "learning_rate": 0.00012794657762938233, "loss": 1.1659, "step": 1085 }, { "epoch": 0.15967653004962323, "grad_norm": 0.6122812628746033, "learning_rate": 0.0001278797996661102, "loss": 1.2891, "step": 1086 }, { "epoch": 0.15982356184524904, "grad_norm": 0.48156556487083435, "learning_rate": 0.00012781302170283807, "loss": 1.2233, "step": 1087 }, { "epoch": 0.15997059364087485, "grad_norm": 0.7112361192703247, "learning_rate": 0.00012774624373956594, "loss": 1.3354, "step": 1088 }, { "epoch": 0.16011762543650065, "grad_norm": 0.6363181471824646, "learning_rate": 0.00012767946577629384, "loss": 1.03, "step": 1089 }, { "epoch": 0.16026465723212643, "grad_norm": 0.6445418000221252, "learning_rate": 0.00012761268781302172, "loss": 1.0183, "step": 1090 }, { "epoch": 0.16041168902775224, "grad_norm": 0.3888480067253113, "learning_rate": 0.0001275459098497496, "loss": 1.3837, "step": 1091 }, { "epoch": 0.16055872082337805, "grad_norm": 0.45673099160194397, "learning_rate": 0.00012747913188647746, "loss": 1.0252, "step": 1092 }, { "epoch": 0.16070575261900386, "grad_norm": 0.5182449221611023, "learning_rate": 0.00012741235392320534, "loss": 1.4271, "step": 1093 }, { "epoch": 0.16085278441462966, "grad_norm": 0.5524419546127319, "learning_rate": 0.00012734557595993324, "loss": 1.1762, "step": 1094 }, { "epoch": 0.16099981621025547, "grad_norm": 0.8431574106216431, "learning_rate": 0.0001272787979966611, "loss": 1.3216, "step": 1095 }, { "epoch": 0.16114684800588128, "grad_norm": 0.6031796336174011, "learning_rate": 0.00012721202003338898, "loss": 1.1226, "step": 1096 }, { "epoch": 0.16129387980150708, "grad_norm": 0.5023674368858337, "learning_rate": 0.00012714524207011686, "loss": 1.3109, "step": 1097 }, { "epoch": 0.1614409115971329, "grad_norm": 0.5386978387832642, "learning_rate": 0.00012707846410684473, "loss": 0.8037, "step": 1098 }, { "epoch": 0.16158794339275867, "grad_norm": 0.4345020353794098, "learning_rate": 0.0001270116861435726, "loss": 1.791, "step": 1099 }, { "epoch": 0.16173497518838448, "grad_norm": 0.45875146985054016, "learning_rate": 0.0001269449081803005, "loss": 1.2516, "step": 1100 }, { "epoch": 0.16188200698401028, "grad_norm": 0.663425624370575, "learning_rate": 0.00012687813021702838, "loss": 1.0834, "step": 1101 }, { "epoch": 0.1620290387796361, "grad_norm": 0.6682278513908386, "learning_rate": 0.00012681135225375628, "loss": 1.027, "step": 1102 }, { "epoch": 0.1621760705752619, "grad_norm": 0.48990926146507263, "learning_rate": 0.00012674457429048415, "loss": 1.2536, "step": 1103 }, { "epoch": 0.1623231023708877, "grad_norm": 0.5398458242416382, "learning_rate": 0.00012667779632721202, "loss": 1.2719, "step": 1104 }, { "epoch": 0.1624701341665135, "grad_norm": 0.4746045768260956, "learning_rate": 0.00012661101836393992, "loss": 1.953, "step": 1105 }, { "epoch": 0.16261716596213932, "grad_norm": 0.687804639339447, "learning_rate": 0.0001265442404006678, "loss": 1.1235, "step": 1106 }, { "epoch": 0.16276419775776513, "grad_norm": 0.561565101146698, "learning_rate": 0.00012647746243739567, "loss": 1.5263, "step": 1107 }, { "epoch": 0.1629112295533909, "grad_norm": 0.6584048867225647, "learning_rate": 0.00012641068447412354, "loss": 1.2639, "step": 1108 }, { "epoch": 0.16305826134901671, "grad_norm": 0.6167396306991577, "learning_rate": 0.00012634390651085142, "loss": 1.2732, "step": 1109 }, { "epoch": 0.16320529314464252, "grad_norm": 0.6604903340339661, "learning_rate": 0.00012627712854757932, "loss": 0.9588, "step": 1110 }, { "epoch": 0.16335232494026833, "grad_norm": 0.7306828498840332, "learning_rate": 0.0001262103505843072, "loss": 1.1834, "step": 1111 }, { "epoch": 0.16349935673589414, "grad_norm": 0.8269386291503906, "learning_rate": 0.00012614357262103506, "loss": 0.9306, "step": 1112 }, { "epoch": 0.16364638853151994, "grad_norm": 0.6063253879547119, "learning_rate": 0.00012607679465776294, "loss": 1.6218, "step": 1113 }, { "epoch": 0.16379342032714575, "grad_norm": 0.5284925103187561, "learning_rate": 0.0001260100166944908, "loss": 0.9619, "step": 1114 }, { "epoch": 0.16394045212277156, "grad_norm": 0.5151240825653076, "learning_rate": 0.00012594323873121868, "loss": 1.2472, "step": 1115 }, { "epoch": 0.16408748391839736, "grad_norm": 0.478516161441803, "learning_rate": 0.00012587646076794658, "loss": 1.3951, "step": 1116 }, { "epoch": 0.16423451571402314, "grad_norm": 0.4375409483909607, "learning_rate": 0.00012580968280467446, "loss": 1.2698, "step": 1117 }, { "epoch": 0.16438154750964895, "grad_norm": 0.648220956325531, "learning_rate": 0.00012574290484140233, "loss": 1.2473, "step": 1118 }, { "epoch": 0.16452857930527476, "grad_norm": 0.6047022938728333, "learning_rate": 0.00012567612687813023, "loss": 1.517, "step": 1119 }, { "epoch": 0.16467561110090057, "grad_norm": 0.5391956567764282, "learning_rate": 0.0001256093489148581, "loss": 1.0774, "step": 1120 }, { "epoch": 0.16482264289652637, "grad_norm": 0.48347699642181396, "learning_rate": 0.000125542570951586, "loss": 1.4573, "step": 1121 }, { "epoch": 0.16496967469215218, "grad_norm": 0.47838646173477173, "learning_rate": 0.00012547579298831388, "loss": 1.0984, "step": 1122 }, { "epoch": 0.165116706487778, "grad_norm": 0.4879448711872101, "learning_rate": 0.00012540901502504175, "loss": 1.2047, "step": 1123 }, { "epoch": 0.1652637382834038, "grad_norm": 0.6342889666557312, "learning_rate": 0.00012534223706176962, "loss": 0.9869, "step": 1124 }, { "epoch": 0.1654107700790296, "grad_norm": 0.49847492575645447, "learning_rate": 0.0001252754590984975, "loss": 1.3615, "step": 1125 }, { "epoch": 0.1655578018746554, "grad_norm": 0.4972043037414551, "learning_rate": 0.0001252086811352254, "loss": 1.1513, "step": 1126 }, { "epoch": 0.1657048336702812, "grad_norm": 0.5115188360214233, "learning_rate": 0.00012514190317195327, "loss": 1.071, "step": 1127 }, { "epoch": 0.165851865465907, "grad_norm": 0.5591790080070496, "learning_rate": 0.00012507512520868114, "loss": 1.1227, "step": 1128 }, { "epoch": 0.1659988972615328, "grad_norm": 0.5644456148147583, "learning_rate": 0.00012500834724540902, "loss": 1.227, "step": 1129 }, { "epoch": 0.1661459290571586, "grad_norm": 0.5823935270309448, "learning_rate": 0.0001249415692821369, "loss": 1.0709, "step": 1130 }, { "epoch": 0.16629296085278442, "grad_norm": 0.6779971718788147, "learning_rate": 0.00012487479131886476, "loss": 1.4032, "step": 1131 }, { "epoch": 0.16643999264841022, "grad_norm": 0.5484857559204102, "learning_rate": 0.00012480801335559266, "loss": 1.0449, "step": 1132 }, { "epoch": 0.16658702444403603, "grad_norm": 0.7668995261192322, "learning_rate": 0.00012474123539232053, "loss": 1.344, "step": 1133 }, { "epoch": 0.16673405623966184, "grad_norm": 0.5466856360435486, "learning_rate": 0.0001246744574290484, "loss": 1.2978, "step": 1134 }, { "epoch": 0.16688108803528764, "grad_norm": 0.5657954216003418, "learning_rate": 0.0001246076794657763, "loss": 1.2347, "step": 1135 }, { "epoch": 0.16702811983091342, "grad_norm": 0.755219578742981, "learning_rate": 0.00012454090150250418, "loss": 1.1704, "step": 1136 }, { "epoch": 0.16717515162653923, "grad_norm": 0.4288432002067566, "learning_rate": 0.00012447412353923208, "loss": 1.6042, "step": 1137 }, { "epoch": 0.16732218342216504, "grad_norm": 0.6164667010307312, "learning_rate": 0.00012440734557595995, "loss": 1.4533, "step": 1138 }, { "epoch": 0.16746921521779085, "grad_norm": 0.5377941131591797, "learning_rate": 0.00012434056761268783, "loss": 1.0812, "step": 1139 }, { "epoch": 0.16761624701341665, "grad_norm": 0.47269126772880554, "learning_rate": 0.0001242737896494157, "loss": 1.4863, "step": 1140 }, { "epoch": 0.16776327880904246, "grad_norm": 0.4447174370288849, "learning_rate": 0.00012420701168614357, "loss": 1.3104, "step": 1141 }, { "epoch": 0.16791031060466827, "grad_norm": 0.5411022901535034, "learning_rate": 0.00012414023372287147, "loss": 1.3127, "step": 1142 }, { "epoch": 0.16805734240029407, "grad_norm": 0.4711875915527344, "learning_rate": 0.00012407345575959935, "loss": 1.2103, "step": 1143 }, { "epoch": 0.16820437419591988, "grad_norm": 0.7156489491462708, "learning_rate": 0.00012400667779632722, "loss": 1.0002, "step": 1144 }, { "epoch": 0.16835140599154566, "grad_norm": 0.46210795640945435, "learning_rate": 0.0001239398998330551, "loss": 1.8049, "step": 1145 }, { "epoch": 0.16849843778717147, "grad_norm": 0.47046732902526855, "learning_rate": 0.00012387312186978297, "loss": 1.3698, "step": 1146 }, { "epoch": 0.16864546958279727, "grad_norm": 0.6308910846710205, "learning_rate": 0.00012380634390651084, "loss": 1.0929, "step": 1147 }, { "epoch": 0.16879250137842308, "grad_norm": 0.5319485664367676, "learning_rate": 0.00012373956594323874, "loss": 1.6448, "step": 1148 }, { "epoch": 0.1689395331740489, "grad_norm": 0.4913954734802246, "learning_rate": 0.00012367278797996661, "loss": 1.0041, "step": 1149 }, { "epoch": 0.1690865649696747, "grad_norm": 0.5061025619506836, "learning_rate": 0.0001236060100166945, "loss": 1.6191, "step": 1150 }, { "epoch": 0.1692335967653005, "grad_norm": 0.47073787450790405, "learning_rate": 0.00012353923205342236, "loss": 1.6581, "step": 1151 }, { "epoch": 0.1693806285609263, "grad_norm": 0.44264334440231323, "learning_rate": 0.00012347245409015026, "loss": 1.6147, "step": 1152 }, { "epoch": 0.16952766035655212, "grad_norm": 0.4856738746166229, "learning_rate": 0.00012340567612687813, "loss": 1.0869, "step": 1153 }, { "epoch": 0.1696746921521779, "grad_norm": 0.6559334397315979, "learning_rate": 0.00012333889816360603, "loss": 1.1327, "step": 1154 }, { "epoch": 0.1698217239478037, "grad_norm": 0.5587737560272217, "learning_rate": 0.0001232721202003339, "loss": 1.4077, "step": 1155 }, { "epoch": 0.1699687557434295, "grad_norm": 0.594240128993988, "learning_rate": 0.00012320534223706178, "loss": 1.4082, "step": 1156 }, { "epoch": 0.17011578753905532, "grad_norm": 0.5311996340751648, "learning_rate": 0.00012313856427378965, "loss": 1.4002, "step": 1157 }, { "epoch": 0.17026281933468113, "grad_norm": 0.8503538966178894, "learning_rate": 0.00012307178631051755, "loss": 1.1519, "step": 1158 }, { "epoch": 0.17040985113030693, "grad_norm": 0.4125695824623108, "learning_rate": 0.00012300500834724543, "loss": 1.7434, "step": 1159 }, { "epoch": 0.17055688292593274, "grad_norm": 0.5790399312973022, "learning_rate": 0.0001229382303839733, "loss": 1.3714, "step": 1160 }, { "epoch": 0.17070391472155855, "grad_norm": 0.5874505639076233, "learning_rate": 0.00012287145242070117, "loss": 1.4271, "step": 1161 }, { "epoch": 0.17085094651718435, "grad_norm": 0.47998887300491333, "learning_rate": 0.00012280467445742905, "loss": 1.2726, "step": 1162 }, { "epoch": 0.17099797831281013, "grad_norm": 0.4660187363624573, "learning_rate": 0.00012273789649415692, "loss": 1.267, "step": 1163 }, { "epoch": 0.17114501010843594, "grad_norm": 0.6771066188812256, "learning_rate": 0.00012267111853088482, "loss": 0.9103, "step": 1164 }, { "epoch": 0.17129204190406175, "grad_norm": 0.5057253241539001, "learning_rate": 0.0001226043405676127, "loss": 1.1383, "step": 1165 }, { "epoch": 0.17143907369968756, "grad_norm": 0.7736232876777649, "learning_rate": 0.00012253756260434057, "loss": 1.2782, "step": 1166 }, { "epoch": 0.17158610549531336, "grad_norm": 0.5966457724571228, "learning_rate": 0.00012247078464106844, "loss": 1.1117, "step": 1167 }, { "epoch": 0.17173313729093917, "grad_norm": 0.6213434338569641, "learning_rate": 0.0001224040066777963, "loss": 1.0376, "step": 1168 }, { "epoch": 0.17188016908656498, "grad_norm": 0.566863477230072, "learning_rate": 0.0001223372287145242, "loss": 1.1063, "step": 1169 }, { "epoch": 0.17202720088219078, "grad_norm": 0.5134406089782715, "learning_rate": 0.00012227045075125209, "loss": 1.2303, "step": 1170 }, { "epoch": 0.1721742326778166, "grad_norm": 0.6940564513206482, "learning_rate": 0.00012220367278797999, "loss": 1.0861, "step": 1171 }, { "epoch": 0.17232126447344237, "grad_norm": 0.7415386438369751, "learning_rate": 0.00012213689482470786, "loss": 0.8356, "step": 1172 }, { "epoch": 0.17246829626906818, "grad_norm": 0.5437687635421753, "learning_rate": 0.00012207011686143572, "loss": 1.373, "step": 1173 }, { "epoch": 0.17261532806469398, "grad_norm": 0.4693496823310852, "learning_rate": 0.00012200333889816362, "loss": 1.2002, "step": 1174 }, { "epoch": 0.1727623598603198, "grad_norm": 0.6234175562858582, "learning_rate": 0.00012193656093489149, "loss": 1.1587, "step": 1175 }, { "epoch": 0.1729093916559456, "grad_norm": 0.6088576912879944, "learning_rate": 0.00012186978297161938, "loss": 1.3338, "step": 1176 }, { "epoch": 0.1730564234515714, "grad_norm": 0.43778562545776367, "learning_rate": 0.00012180300500834725, "loss": 1.1186, "step": 1177 }, { "epoch": 0.1732034552471972, "grad_norm": 0.5359873175621033, "learning_rate": 0.00012173622704507512, "loss": 1.3824, "step": 1178 }, { "epoch": 0.17335048704282302, "grad_norm": 0.5481301546096802, "learning_rate": 0.00012166944908180303, "loss": 1.0796, "step": 1179 }, { "epoch": 0.17349751883844883, "grad_norm": 0.5278254747390747, "learning_rate": 0.0001216026711185309, "loss": 1.2654, "step": 1180 }, { "epoch": 0.1736445506340746, "grad_norm": 0.5637503862380981, "learning_rate": 0.00012153589315525877, "loss": 0.9801, "step": 1181 }, { "epoch": 0.17379158242970041, "grad_norm": 0.4416888356208801, "learning_rate": 0.00012146911519198664, "loss": 1.5166, "step": 1182 }, { "epoch": 0.17393861422532622, "grad_norm": 0.5327706933021545, "learning_rate": 0.00012140233722871452, "loss": 1.1444, "step": 1183 }, { "epoch": 0.17408564602095203, "grad_norm": 0.4670265018939972, "learning_rate": 0.0001213355592654424, "loss": 1.7004, "step": 1184 }, { "epoch": 0.17423267781657784, "grad_norm": 0.6740869879722595, "learning_rate": 0.00012126878130217029, "loss": 1.3123, "step": 1185 }, { "epoch": 0.17437970961220364, "grad_norm": 0.4767744541168213, "learning_rate": 0.00012120200333889818, "loss": 1.5508, "step": 1186 }, { "epoch": 0.17452674140782945, "grad_norm": 0.49833595752716064, "learning_rate": 0.00012113522537562605, "loss": 1.123, "step": 1187 }, { "epoch": 0.17467377320345526, "grad_norm": 0.7638326287269592, "learning_rate": 0.00012106844741235392, "loss": 1.4892, "step": 1188 }, { "epoch": 0.17482080499908106, "grad_norm": 0.49769920110702515, "learning_rate": 0.0001210016694490818, "loss": 1.5552, "step": 1189 }, { "epoch": 0.17496783679470684, "grad_norm": 0.4968523681163788, "learning_rate": 0.0001209348914858097, "loss": 1.24, "step": 1190 }, { "epoch": 0.17511486859033265, "grad_norm": 0.45984214544296265, "learning_rate": 0.00012086811352253757, "loss": 1.5405, "step": 1191 }, { "epoch": 0.17526190038595846, "grad_norm": 0.5157444477081299, "learning_rate": 0.00012080133555926544, "loss": 1.1597, "step": 1192 }, { "epoch": 0.17540893218158427, "grad_norm": 0.7023390531539917, "learning_rate": 0.00012073455759599333, "loss": 1.0176, "step": 1193 }, { "epoch": 0.17555596397721007, "grad_norm": 0.5222766399383545, "learning_rate": 0.0001206677796327212, "loss": 1.014, "step": 1194 }, { "epoch": 0.17570299577283588, "grad_norm": 0.475521445274353, "learning_rate": 0.0001206010016694491, "loss": 1.4873, "step": 1195 }, { "epoch": 0.1758500275684617, "grad_norm": 0.4973902106285095, "learning_rate": 0.00012053422370617698, "loss": 1.4886, "step": 1196 }, { "epoch": 0.1759970593640875, "grad_norm": 0.4890851676464081, "learning_rate": 0.00012046744574290485, "loss": 1.2668, "step": 1197 }, { "epoch": 0.1761440911597133, "grad_norm": 0.5010033845901489, "learning_rate": 0.00012040066777963272, "loss": 1.261, "step": 1198 }, { "epoch": 0.17629112295533908, "grad_norm": 0.7270867824554443, "learning_rate": 0.0001203338898163606, "loss": 1.112, "step": 1199 }, { "epoch": 0.1764381547509649, "grad_norm": 0.528663158416748, "learning_rate": 0.00012026711185308848, "loss": 1.0082, "step": 1200 }, { "epoch": 0.1765851865465907, "grad_norm": 0.4572063088417053, "learning_rate": 0.00012020033388981637, "loss": 1.4181, "step": 1201 }, { "epoch": 0.1767322183422165, "grad_norm": 0.3905852735042572, "learning_rate": 0.00012013355592654426, "loss": 1.5227, "step": 1202 }, { "epoch": 0.1768792501378423, "grad_norm": 0.5379745364189148, "learning_rate": 0.00012006677796327213, "loss": 1.3941, "step": 1203 }, { "epoch": 0.17702628193346812, "grad_norm": 0.5934731960296631, "learning_rate": 0.00012, "loss": 1.3318, "step": 1204 }, { "epoch": 0.17717331372909392, "grad_norm": 0.5675480961799622, "learning_rate": 0.00011993322203672788, "loss": 1.3179, "step": 1205 }, { "epoch": 0.17732034552471973, "grad_norm": 0.48539409041404724, "learning_rate": 0.00011986644407345578, "loss": 1.9183, "step": 1206 }, { "epoch": 0.17746737732034554, "grad_norm": 0.49400487542152405, "learning_rate": 0.00011979966611018365, "loss": 1.2673, "step": 1207 }, { "epoch": 0.17761440911597132, "grad_norm": 0.5799417495727539, "learning_rate": 0.00011973288814691152, "loss": 1.3248, "step": 1208 }, { "epoch": 0.17776144091159712, "grad_norm": 0.47683843970298767, "learning_rate": 0.0001196661101836394, "loss": 1.4498, "step": 1209 }, { "epoch": 0.17790847270722293, "grad_norm": 0.7713230848312378, "learning_rate": 0.00011959933222036728, "loss": 1.1253, "step": 1210 }, { "epoch": 0.17805550450284874, "grad_norm": 0.5217681527137756, "learning_rate": 0.00011953255425709517, "loss": 1.2639, "step": 1211 }, { "epoch": 0.17820253629847455, "grad_norm": 0.5968557000160217, "learning_rate": 0.00011946577629382306, "loss": 1.2692, "step": 1212 }, { "epoch": 0.17834956809410035, "grad_norm": 0.7075598239898682, "learning_rate": 0.00011939899833055093, "loss": 1.2021, "step": 1213 }, { "epoch": 0.17849659988972616, "grad_norm": 0.5303223133087158, "learning_rate": 0.0001193322203672788, "loss": 1.1741, "step": 1214 }, { "epoch": 0.17864363168535197, "grad_norm": 0.6452450156211853, "learning_rate": 0.00011926544240400668, "loss": 1.1632, "step": 1215 }, { "epoch": 0.17879066348097777, "grad_norm": 0.6518301367759705, "learning_rate": 0.00011919866444073455, "loss": 1.071, "step": 1216 }, { "epoch": 0.17893769527660355, "grad_norm": 0.5585775971412659, "learning_rate": 0.00011913188647746245, "loss": 1.1991, "step": 1217 }, { "epoch": 0.17908472707222936, "grad_norm": 0.40637078881263733, "learning_rate": 0.00011906510851419032, "loss": 1.4011, "step": 1218 }, { "epoch": 0.17923175886785517, "grad_norm": 0.5704571008682251, "learning_rate": 0.00011899833055091821, "loss": 1.3036, "step": 1219 }, { "epoch": 0.17937879066348097, "grad_norm": 0.5434959530830383, "learning_rate": 0.00011893155258764608, "loss": 0.9562, "step": 1220 }, { "epoch": 0.17952582245910678, "grad_norm": 0.658091127872467, "learning_rate": 0.00011886477462437396, "loss": 1.0837, "step": 1221 }, { "epoch": 0.1796728542547326, "grad_norm": 0.5435907244682312, "learning_rate": 0.00011879799666110186, "loss": 1.1511, "step": 1222 }, { "epoch": 0.1798198860503584, "grad_norm": 0.7719695568084717, "learning_rate": 0.00011873121869782973, "loss": 1.223, "step": 1223 }, { "epoch": 0.1799669178459842, "grad_norm": 0.6551958322525024, "learning_rate": 0.0001186644407345576, "loss": 1.2745, "step": 1224 }, { "epoch": 0.18011394964161, "grad_norm": 0.6359519958496094, "learning_rate": 0.00011859766277128547, "loss": 1.1361, "step": 1225 }, { "epoch": 0.1802609814372358, "grad_norm": 0.708357036113739, "learning_rate": 0.00011853088480801335, "loss": 1.1375, "step": 1226 }, { "epoch": 0.1804080132328616, "grad_norm": 0.5175237059593201, "learning_rate": 0.00011846410684474125, "loss": 1.1591, "step": 1227 }, { "epoch": 0.1805550450284874, "grad_norm": 0.6814719438552856, "learning_rate": 0.00011839732888146912, "loss": 1.3331, "step": 1228 }, { "epoch": 0.1807020768241132, "grad_norm": 0.7352350354194641, "learning_rate": 0.00011833055091819701, "loss": 1.2295, "step": 1229 }, { "epoch": 0.18084910861973902, "grad_norm": 0.6147286891937256, "learning_rate": 0.00011826377295492488, "loss": 1.0048, "step": 1230 }, { "epoch": 0.18099614041536483, "grad_norm": 0.6259649395942688, "learning_rate": 0.00011819699499165275, "loss": 1.0858, "step": 1231 }, { "epoch": 0.18114317221099063, "grad_norm": 0.5037545561790466, "learning_rate": 0.00011813021702838063, "loss": 1.2415, "step": 1232 }, { "epoch": 0.18129020400661644, "grad_norm": 0.6225839257240295, "learning_rate": 0.00011806343906510853, "loss": 1.3513, "step": 1233 }, { "epoch": 0.18143723580224225, "grad_norm": 0.5801997184753418, "learning_rate": 0.0001179966611018364, "loss": 1.3931, "step": 1234 }, { "epoch": 0.18158426759786803, "grad_norm": 0.5562853217124939, "learning_rate": 0.00011792988313856427, "loss": 1.2615, "step": 1235 }, { "epoch": 0.18173129939349383, "grad_norm": 0.6609338521957397, "learning_rate": 0.00011786310517529216, "loss": 1.4688, "step": 1236 }, { "epoch": 0.18187833118911964, "grad_norm": 0.5495307445526123, "learning_rate": 0.00011779632721202003, "loss": 1.5071, "step": 1237 }, { "epoch": 0.18202536298474545, "grad_norm": 0.5168755054473877, "learning_rate": 0.00011772954924874793, "loss": 1.35, "step": 1238 }, { "epoch": 0.18217239478037126, "grad_norm": 0.598572313785553, "learning_rate": 0.00011766277128547581, "loss": 0.8895, "step": 1239 }, { "epoch": 0.18231942657599706, "grad_norm": 0.555892825126648, "learning_rate": 0.00011759599332220368, "loss": 1.4415, "step": 1240 }, { "epoch": 0.18246645837162287, "grad_norm": 0.491562157869339, "learning_rate": 0.00011752921535893155, "loss": 1.4017, "step": 1241 }, { "epoch": 0.18261349016724868, "grad_norm": 0.6731282472610474, "learning_rate": 0.00011746243739565943, "loss": 1.3867, "step": 1242 }, { "epoch": 0.18276052196287448, "grad_norm": 0.38318315148353577, "learning_rate": 0.00011739565943238733, "loss": 1.48, "step": 1243 }, { "epoch": 0.18290755375850026, "grad_norm": 0.39496612548828125, "learning_rate": 0.0001173288814691152, "loss": 1.2355, "step": 1244 }, { "epoch": 0.18305458555412607, "grad_norm": 0.6515347361564636, "learning_rate": 0.00011726210350584307, "loss": 1.1271, "step": 1245 }, { "epoch": 0.18320161734975188, "grad_norm": 0.5185102820396423, "learning_rate": 0.00011719532554257096, "loss": 1.275, "step": 1246 }, { "epoch": 0.18334864914537768, "grad_norm": 0.5619149208068848, "learning_rate": 0.00011712854757929883, "loss": 1.3542, "step": 1247 }, { "epoch": 0.1834956809410035, "grad_norm": 0.5040785074234009, "learning_rate": 0.0001170617696160267, "loss": 1.1582, "step": 1248 }, { "epoch": 0.1836427127366293, "grad_norm": 1.1896318197250366, "learning_rate": 0.0001169949916527546, "loss": 1.2223, "step": 1249 }, { "epoch": 0.1837897445322551, "grad_norm": 0.6427366137504578, "learning_rate": 0.00011692821368948248, "loss": 1.2621, "step": 1250 }, { "epoch": 0.1839367763278809, "grad_norm": 0.5269393920898438, "learning_rate": 0.00011686143572621035, "loss": 1.2634, "step": 1251 }, { "epoch": 0.18408380812350672, "grad_norm": 0.6794466972351074, "learning_rate": 0.00011679465776293823, "loss": 1.1839, "step": 1252 }, { "epoch": 0.1842308399191325, "grad_norm": 0.6739030480384827, "learning_rate": 0.00011672787979966611, "loss": 1.2059, "step": 1253 }, { "epoch": 0.1843778717147583, "grad_norm": 0.5035480260848999, "learning_rate": 0.000116661101836394, "loss": 1.5413, "step": 1254 }, { "epoch": 0.18452490351038411, "grad_norm": 0.5343230962753296, "learning_rate": 0.00011659432387312189, "loss": 1.3174, "step": 1255 }, { "epoch": 0.18467193530600992, "grad_norm": 0.7894048690795898, "learning_rate": 0.00011652754590984976, "loss": 1.2537, "step": 1256 }, { "epoch": 0.18481896710163573, "grad_norm": 0.5969638824462891, "learning_rate": 0.00011646076794657763, "loss": 1.0677, "step": 1257 }, { "epoch": 0.18496599889726154, "grad_norm": 0.6597347855567932, "learning_rate": 0.0001163939899833055, "loss": 1.3195, "step": 1258 }, { "epoch": 0.18511303069288734, "grad_norm": 0.6149417757987976, "learning_rate": 0.0001163272120200334, "loss": 1.1772, "step": 1259 }, { "epoch": 0.18526006248851315, "grad_norm": 0.6688962578773499, "learning_rate": 0.00011626043405676128, "loss": 1.0228, "step": 1260 }, { "epoch": 0.18540709428413896, "grad_norm": 0.6903858184814453, "learning_rate": 0.00011619365609348915, "loss": 1.0709, "step": 1261 }, { "epoch": 0.18555412607976474, "grad_norm": 0.6539767384529114, "learning_rate": 0.00011612687813021703, "loss": 0.9346, "step": 1262 }, { "epoch": 0.18570115787539054, "grad_norm": 0.6902507543563843, "learning_rate": 0.00011606010016694491, "loss": 1.095, "step": 1263 }, { "epoch": 0.18584818967101635, "grad_norm": 0.5091347098350525, "learning_rate": 0.00011599332220367279, "loss": 1.181, "step": 1264 }, { "epoch": 0.18599522146664216, "grad_norm": 0.5918865203857422, "learning_rate": 0.00011592654424040069, "loss": 1.3808, "step": 1265 }, { "epoch": 0.18614225326226796, "grad_norm": 0.5882330536842346, "learning_rate": 0.00011585976627712856, "loss": 1.4591, "step": 1266 }, { "epoch": 0.18628928505789377, "grad_norm": 0.48394352197647095, "learning_rate": 0.00011579298831385643, "loss": 1.4503, "step": 1267 }, { "epoch": 0.18643631685351958, "grad_norm": 0.5273301005363464, "learning_rate": 0.0001157262103505843, "loss": 1.2402, "step": 1268 }, { "epoch": 0.1865833486491454, "grad_norm": 0.4930548071861267, "learning_rate": 0.00011565943238731218, "loss": 1.1725, "step": 1269 }, { "epoch": 0.1867303804447712, "grad_norm": 0.5125686526298523, "learning_rate": 0.00011559265442404008, "loss": 1.2238, "step": 1270 }, { "epoch": 0.18687741224039697, "grad_norm": 0.5540836453437805, "learning_rate": 0.00011552587646076795, "loss": 1.0948, "step": 1271 }, { "epoch": 0.18702444403602278, "grad_norm": 0.8548254370689392, "learning_rate": 0.00011545909849749584, "loss": 1.0685, "step": 1272 }, { "epoch": 0.1871714758316486, "grad_norm": 0.5901070237159729, "learning_rate": 0.00011539232053422371, "loss": 1.3175, "step": 1273 }, { "epoch": 0.1873185076272744, "grad_norm": 0.44147756695747375, "learning_rate": 0.00011532554257095158, "loss": 1.2821, "step": 1274 }, { "epoch": 0.1874655394229002, "grad_norm": 0.5040634274482727, "learning_rate": 0.00011525876460767948, "loss": 1.676, "step": 1275 }, { "epoch": 0.187612571218526, "grad_norm": 0.46972373127937317, "learning_rate": 0.00011519198664440736, "loss": 1.4928, "step": 1276 }, { "epoch": 0.18775960301415182, "grad_norm": 0.6661924719810486, "learning_rate": 0.00011512520868113523, "loss": 0.9938, "step": 1277 }, { "epoch": 0.18790663480977762, "grad_norm": 0.7665374875068665, "learning_rate": 0.0001150584307178631, "loss": 1.6374, "step": 1278 }, { "epoch": 0.18805366660540343, "grad_norm": 0.8050884008407593, "learning_rate": 0.00011499165275459098, "loss": 1.3809, "step": 1279 }, { "epoch": 0.1882006984010292, "grad_norm": 0.5744901299476624, "learning_rate": 0.00011492487479131886, "loss": 1.0992, "step": 1280 }, { "epoch": 0.18834773019665502, "grad_norm": 0.5724813342094421, "learning_rate": 0.00011485809682804675, "loss": 1.3525, "step": 1281 }, { "epoch": 0.18849476199228082, "grad_norm": 0.5295944809913635, "learning_rate": 0.00011479131886477464, "loss": 1.128, "step": 1282 }, { "epoch": 0.18864179378790663, "grad_norm": 0.719225287437439, "learning_rate": 0.00011472454090150251, "loss": 1.3532, "step": 1283 }, { "epoch": 0.18878882558353244, "grad_norm": 0.4361250400543213, "learning_rate": 0.00011465776293823038, "loss": 1.8268, "step": 1284 }, { "epoch": 0.18893585737915825, "grad_norm": 0.5057854056358337, "learning_rate": 0.00011459098497495826, "loss": 1.068, "step": 1285 }, { "epoch": 0.18908288917478405, "grad_norm": 0.5487648248672485, "learning_rate": 0.00011452420701168616, "loss": 0.9463, "step": 1286 }, { "epoch": 0.18922992097040986, "grad_norm": 0.6874895691871643, "learning_rate": 0.00011445742904841403, "loss": 0.9599, "step": 1287 }, { "epoch": 0.18937695276603567, "grad_norm": 0.5414902567863464, "learning_rate": 0.0001143906510851419, "loss": 1.1536, "step": 1288 }, { "epoch": 0.18952398456166145, "grad_norm": 0.4598829448223114, "learning_rate": 0.00011432387312186979, "loss": 1.5194, "step": 1289 }, { "epoch": 0.18967101635728725, "grad_norm": 0.573000967502594, "learning_rate": 0.00011425709515859766, "loss": 1.4332, "step": 1290 }, { "epoch": 0.18981804815291306, "grad_norm": 0.7883924841880798, "learning_rate": 0.00011419031719532556, "loss": 0.8516, "step": 1291 }, { "epoch": 0.18996507994853887, "grad_norm": 0.5924862623214722, "learning_rate": 0.00011412353923205344, "loss": 0.9863, "step": 1292 }, { "epoch": 0.19011211174416467, "grad_norm": 0.556874692440033, "learning_rate": 0.00011405676126878131, "loss": 1.2666, "step": 1293 }, { "epoch": 0.19025914353979048, "grad_norm": 0.8738967180252075, "learning_rate": 0.00011398998330550918, "loss": 1.2684, "step": 1294 }, { "epoch": 0.1904061753354163, "grad_norm": 0.37248101830482483, "learning_rate": 0.00011392320534223706, "loss": 1.6203, "step": 1295 }, { "epoch": 0.1905532071310421, "grad_norm": 0.5726788640022278, "learning_rate": 0.00011385642737896493, "loss": 1.0608, "step": 1296 }, { "epoch": 0.1907002389266679, "grad_norm": 0.6718825697898865, "learning_rate": 0.00011378964941569283, "loss": 1.1889, "step": 1297 }, { "epoch": 0.19084727072229368, "grad_norm": 0.656559944152832, "learning_rate": 0.0001137228714524207, "loss": 1.1414, "step": 1298 }, { "epoch": 0.1909943025179195, "grad_norm": 0.47017186880111694, "learning_rate": 0.00011365609348914859, "loss": 1.739, "step": 1299 }, { "epoch": 0.1911413343135453, "grad_norm": 0.6589781641960144, "learning_rate": 0.00011358931552587646, "loss": 0.8992, "step": 1300 }, { "epoch": 0.1912883661091711, "grad_norm": 0.7519534230232239, "learning_rate": 0.00011352253756260434, "loss": 1.2868, "step": 1301 }, { "epoch": 0.1914353979047969, "grad_norm": 0.6856164932250977, "learning_rate": 0.00011345575959933224, "loss": 1.1425, "step": 1302 }, { "epoch": 0.19158242970042272, "grad_norm": 0.5937598943710327, "learning_rate": 0.00011338898163606011, "loss": 0.981, "step": 1303 }, { "epoch": 0.19172946149604853, "grad_norm": 0.5290572047233582, "learning_rate": 0.00011332220367278798, "loss": 1.0691, "step": 1304 }, { "epoch": 0.19187649329167433, "grad_norm": 0.5258932709693909, "learning_rate": 0.00011325542570951586, "loss": 1.4023, "step": 1305 }, { "epoch": 0.19202352508730014, "grad_norm": 0.5423656105995178, "learning_rate": 0.00011318864774624374, "loss": 1.0567, "step": 1306 }, { "epoch": 0.19217055688292592, "grad_norm": 0.3981834352016449, "learning_rate": 0.00011312186978297163, "loss": 1.853, "step": 1307 }, { "epoch": 0.19231758867855173, "grad_norm": 0.6948246955871582, "learning_rate": 0.00011305509181969952, "loss": 1.1191, "step": 1308 }, { "epoch": 0.19246462047417753, "grad_norm": 0.6509448289871216, "learning_rate": 0.00011298831385642739, "loss": 0.8114, "step": 1309 }, { "epoch": 0.19261165226980334, "grad_norm": 0.5775894522666931, "learning_rate": 0.00011292153589315526, "loss": 1.5418, "step": 1310 }, { "epoch": 0.19275868406542915, "grad_norm": 0.5190936326980591, "learning_rate": 0.00011285475792988314, "loss": 1.28, "step": 1311 }, { "epoch": 0.19290571586105495, "grad_norm": 0.5893734693527222, "learning_rate": 0.00011278797996661104, "loss": 1.0854, "step": 1312 }, { "epoch": 0.19305274765668076, "grad_norm": 0.6321806907653809, "learning_rate": 0.00011272120200333891, "loss": 1.0772, "step": 1313 }, { "epoch": 0.19319977945230657, "grad_norm": 0.41487735509872437, "learning_rate": 0.00011265442404006678, "loss": 0.8743, "step": 1314 }, { "epoch": 0.19334681124793238, "grad_norm": 0.4956062436103821, "learning_rate": 0.00011258764607679465, "loss": 1.368, "step": 1315 }, { "epoch": 0.19349384304355816, "grad_norm": 0.5012564063072205, "learning_rate": 0.00011252086811352254, "loss": 1.336, "step": 1316 }, { "epoch": 0.19364087483918396, "grad_norm": 0.3993220329284668, "learning_rate": 0.00011245409015025041, "loss": 1.156, "step": 1317 }, { "epoch": 0.19378790663480977, "grad_norm": 0.6414085030555725, "learning_rate": 0.00011238731218697832, "loss": 1.1502, "step": 1318 }, { "epoch": 0.19393493843043558, "grad_norm": 0.5336965918540955, "learning_rate": 0.00011232053422370619, "loss": 1.1942, "step": 1319 }, { "epoch": 0.19408197022606138, "grad_norm": 0.5665513873100281, "learning_rate": 0.00011225375626043406, "loss": 1.1169, "step": 1320 }, { "epoch": 0.1942290020216872, "grad_norm": 0.5210825204849243, "learning_rate": 0.00011218697829716193, "loss": 1.2875, "step": 1321 }, { "epoch": 0.194376033817313, "grad_norm": 0.6049639582633972, "learning_rate": 0.00011212020033388981, "loss": 1.3016, "step": 1322 }, { "epoch": 0.1945230656129388, "grad_norm": 0.8444863557815552, "learning_rate": 0.00011205342237061771, "loss": 1.2669, "step": 1323 }, { "epoch": 0.1946700974085646, "grad_norm": 0.6765125393867493, "learning_rate": 0.00011198664440734558, "loss": 1.3361, "step": 1324 }, { "epoch": 0.1948171292041904, "grad_norm": 0.5615106225013733, "learning_rate": 0.00011191986644407347, "loss": 1.5167, "step": 1325 }, { "epoch": 0.1949641609998162, "grad_norm": 0.5011158585548401, "learning_rate": 0.00011185308848080134, "loss": 1.3603, "step": 1326 }, { "epoch": 0.195111192795442, "grad_norm": 0.4778006672859192, "learning_rate": 0.00011178631051752921, "loss": 1.2905, "step": 1327 }, { "epoch": 0.1952582245910678, "grad_norm": 0.43544769287109375, "learning_rate": 0.00011171953255425711, "loss": 1.755, "step": 1328 }, { "epoch": 0.19540525638669362, "grad_norm": 0.8488903045654297, "learning_rate": 0.00011165275459098499, "loss": 1.0117, "step": 1329 }, { "epoch": 0.19555228818231943, "grad_norm": 0.5459789633750916, "learning_rate": 0.00011158597662771286, "loss": 1.2912, "step": 1330 }, { "epoch": 0.19569931997794524, "grad_norm": 0.5740692615509033, "learning_rate": 0.00011151919866444073, "loss": 1.1936, "step": 1331 }, { "epoch": 0.19584635177357104, "grad_norm": 0.5875141024589539, "learning_rate": 0.00011145242070116862, "loss": 1.018, "step": 1332 }, { "epoch": 0.19599338356919685, "grad_norm": 0.4285443127155304, "learning_rate": 0.0001113856427378965, "loss": 1.1165, "step": 1333 }, { "epoch": 0.19614041536482266, "grad_norm": 0.4854544699192047, "learning_rate": 0.0001113188647746244, "loss": 1.2183, "step": 1334 }, { "epoch": 0.19628744716044844, "grad_norm": 0.6221902966499329, "learning_rate": 0.00011125208681135227, "loss": 1.109, "step": 1335 }, { "epoch": 0.19643447895607424, "grad_norm": 0.5218989849090576, "learning_rate": 0.00011118530884808014, "loss": 1.1131, "step": 1336 }, { "epoch": 0.19658151075170005, "grad_norm": 0.6310389041900635, "learning_rate": 0.00011111853088480801, "loss": 1.2605, "step": 1337 }, { "epoch": 0.19672854254732586, "grad_norm": 0.7327041625976562, "learning_rate": 0.00011105175292153589, "loss": 0.9666, "step": 1338 }, { "epoch": 0.19687557434295166, "grad_norm": 0.6710737347602844, "learning_rate": 0.00011098497495826379, "loss": 1.0553, "step": 1339 }, { "epoch": 0.19702260613857747, "grad_norm": 0.6473710536956787, "learning_rate": 0.00011091819699499166, "loss": 1.0541, "step": 1340 }, { "epoch": 0.19716963793420328, "grad_norm": 0.4750458002090454, "learning_rate": 0.00011085141903171953, "loss": 1.8032, "step": 1341 }, { "epoch": 0.1973166697298291, "grad_norm": 0.5676790475845337, "learning_rate": 0.00011078464106844742, "loss": 1.1733, "step": 1342 }, { "epoch": 0.1974637015254549, "grad_norm": 0.43958574533462524, "learning_rate": 0.00011071786310517529, "loss": 1.6542, "step": 1343 }, { "epoch": 0.19761073332108067, "grad_norm": 0.5527573823928833, "learning_rate": 0.0001106510851419032, "loss": 1.0577, "step": 1344 }, { "epoch": 0.19775776511670648, "grad_norm": 0.6104844808578491, "learning_rate": 0.00011058430717863107, "loss": 0.9464, "step": 1345 }, { "epoch": 0.1979047969123323, "grad_norm": 0.4456753730773926, "learning_rate": 0.00011051752921535894, "loss": 1.1378, "step": 1346 }, { "epoch": 0.1980518287079581, "grad_norm": 0.5965105891227722, "learning_rate": 0.00011045075125208681, "loss": 1.0064, "step": 1347 }, { "epoch": 0.1981988605035839, "grad_norm": 0.623145580291748, "learning_rate": 0.00011038397328881469, "loss": 1.1097, "step": 1348 }, { "epoch": 0.1983458922992097, "grad_norm": 0.6386638283729553, "learning_rate": 0.00011031719532554257, "loss": 1.138, "step": 1349 }, { "epoch": 0.19849292409483552, "grad_norm": 0.5800279974937439, "learning_rate": 0.00011025041736227046, "loss": 1.0969, "step": 1350 }, { "epoch": 0.19863995589046132, "grad_norm": 0.6445541381835938, "learning_rate": 0.00011018363939899835, "loss": 1.2772, "step": 1351 }, { "epoch": 0.19878698768608713, "grad_norm": 0.5820457935333252, "learning_rate": 0.00011011686143572622, "loss": 1.0877, "step": 1352 }, { "epoch": 0.1989340194817129, "grad_norm": 0.5991929769515991, "learning_rate": 0.00011005008347245409, "loss": 1.2047, "step": 1353 }, { "epoch": 0.19908105127733872, "grad_norm": 0.6709920167922974, "learning_rate": 0.00010998330550918197, "loss": 1.1724, "step": 1354 }, { "epoch": 0.19922808307296452, "grad_norm": 0.7189682126045227, "learning_rate": 0.00010991652754590987, "loss": 1.2473, "step": 1355 }, { "epoch": 0.19937511486859033, "grad_norm": 0.6283499002456665, "learning_rate": 0.00010984974958263774, "loss": 1.0946, "step": 1356 }, { "epoch": 0.19952214666421614, "grad_norm": 0.6136909127235413, "learning_rate": 0.00010978297161936561, "loss": 1.3151, "step": 1357 }, { "epoch": 0.19966917845984195, "grad_norm": 0.5018932223320007, "learning_rate": 0.00010971619365609349, "loss": 1.6498, "step": 1358 }, { "epoch": 0.19981621025546775, "grad_norm": 0.5600033402442932, "learning_rate": 0.00010964941569282137, "loss": 1.3716, "step": 1359 }, { "epoch": 0.19996324205109356, "grad_norm": 0.49299564957618713, "learning_rate": 0.00010958263772954926, "loss": 0.8417, "step": 1360 }, { "epoch": 0.20011027384671937, "grad_norm": 0.6016470193862915, "learning_rate": 0.00010951585976627715, "loss": 1.0861, "step": 1361 }, { "epoch": 0.20025730564234515, "grad_norm": 0.6360546946525574, "learning_rate": 0.00010944908180300502, "loss": 1.26, "step": 1362 }, { "epoch": 0.20040433743797095, "grad_norm": 0.6016337871551514, "learning_rate": 0.00010938230383973289, "loss": 0.9041, "step": 1363 }, { "epoch": 0.20055136923359676, "grad_norm": 0.6311550140380859, "learning_rate": 0.00010931552587646076, "loss": 1.0035, "step": 1364 }, { "epoch": 0.20069840102922257, "grad_norm": 0.6036110520362854, "learning_rate": 0.00010924874791318864, "loss": 1.0173, "step": 1365 }, { "epoch": 0.20084543282484837, "grad_norm": 0.4544641971588135, "learning_rate": 0.00010918196994991654, "loss": 1.8338, "step": 1366 }, { "epoch": 0.20099246462047418, "grad_norm": 0.5990464091300964, "learning_rate": 0.00010911519198664441, "loss": 0.9318, "step": 1367 }, { "epoch": 0.2011394964161, "grad_norm": 0.5122262835502625, "learning_rate": 0.0001090484140233723, "loss": 1.3273, "step": 1368 }, { "epoch": 0.2012865282117258, "grad_norm": 0.617039144039154, "learning_rate": 0.00010898163606010017, "loss": 1.2915, "step": 1369 }, { "epoch": 0.2014335600073516, "grad_norm": 0.5768195986747742, "learning_rate": 0.00010891485809682804, "loss": 1.1736, "step": 1370 }, { "epoch": 0.20158059180297738, "grad_norm": 0.46325671672821045, "learning_rate": 0.00010884808013355594, "loss": 1.5813, "step": 1371 }, { "epoch": 0.2017276235986032, "grad_norm": 0.694161593914032, "learning_rate": 0.00010878130217028382, "loss": 1.207, "step": 1372 }, { "epoch": 0.201874655394229, "grad_norm": 0.7317565679550171, "learning_rate": 0.00010871452420701169, "loss": 1.2879, "step": 1373 }, { "epoch": 0.2020216871898548, "grad_norm": 0.6269108653068542, "learning_rate": 0.00010864774624373956, "loss": 0.782, "step": 1374 }, { "epoch": 0.2021687189854806, "grad_norm": 0.5519380569458008, "learning_rate": 0.00010858096828046744, "loss": 1.3482, "step": 1375 }, { "epoch": 0.20231575078110642, "grad_norm": 0.6951602101325989, "learning_rate": 0.00010851419031719534, "loss": 1.2011, "step": 1376 }, { "epoch": 0.20246278257673223, "grad_norm": 0.40396812558174133, "learning_rate": 0.00010844741235392321, "loss": 1.579, "step": 1377 }, { "epoch": 0.20260981437235803, "grad_norm": 0.6856363415718079, "learning_rate": 0.0001083806343906511, "loss": 1.5895, "step": 1378 }, { "epoch": 0.20275684616798384, "grad_norm": 0.5292795300483704, "learning_rate": 0.00010831385642737897, "loss": 1.3878, "step": 1379 }, { "epoch": 0.20290387796360962, "grad_norm": 0.5322324633598328, "learning_rate": 0.00010824707846410684, "loss": 1.6188, "step": 1380 }, { "epoch": 0.20305090975923543, "grad_norm": 0.5631840825080872, "learning_rate": 0.00010818030050083472, "loss": 1.2924, "step": 1381 }, { "epoch": 0.20319794155486123, "grad_norm": 0.7673616409301758, "learning_rate": 0.00010811352253756262, "loss": 1.0358, "step": 1382 }, { "epoch": 0.20334497335048704, "grad_norm": 0.4426136016845703, "learning_rate": 0.00010804674457429049, "loss": 1.1825, "step": 1383 }, { "epoch": 0.20349200514611285, "grad_norm": 0.47640693187713623, "learning_rate": 0.00010797996661101836, "loss": 1.306, "step": 1384 }, { "epoch": 0.20363903694173865, "grad_norm": 0.4135418236255646, "learning_rate": 0.00010791318864774625, "loss": 1.332, "step": 1385 }, { "epoch": 0.20378606873736446, "grad_norm": 0.5709166526794434, "learning_rate": 0.00010784641068447412, "loss": 1.2704, "step": 1386 }, { "epoch": 0.20393310053299027, "grad_norm": 0.5829437375068665, "learning_rate": 0.00010777963272120202, "loss": 0.8679, "step": 1387 }, { "epoch": 0.20408013232861608, "grad_norm": 0.4465857446193695, "learning_rate": 0.0001077128547579299, "loss": 1.3153, "step": 1388 }, { "epoch": 0.20422716412424186, "grad_norm": 0.384857177734375, "learning_rate": 0.00010764607679465777, "loss": 1.5058, "step": 1389 }, { "epoch": 0.20437419591986766, "grad_norm": 0.6209990978240967, "learning_rate": 0.00010757929883138564, "loss": 1.0635, "step": 1390 }, { "epoch": 0.20452122771549347, "grad_norm": 0.4262312650680542, "learning_rate": 0.00010751252086811352, "loss": 1.2643, "step": 1391 }, { "epoch": 0.20466825951111928, "grad_norm": 0.4951227009296417, "learning_rate": 0.00010744574290484142, "loss": 1.2025, "step": 1392 }, { "epoch": 0.20481529130674508, "grad_norm": 0.6629029512405396, "learning_rate": 0.00010737896494156929, "loss": 0.8636, "step": 1393 }, { "epoch": 0.2049623231023709, "grad_norm": 0.479524165391922, "learning_rate": 0.00010731218697829716, "loss": 1.7942, "step": 1394 }, { "epoch": 0.2051093548979967, "grad_norm": 0.6288361549377441, "learning_rate": 0.00010724540901502505, "loss": 1.33, "step": 1395 }, { "epoch": 0.2052563866936225, "grad_norm": 0.3803701400756836, "learning_rate": 0.00010717863105175292, "loss": 1.4027, "step": 1396 }, { "epoch": 0.2054034184892483, "grad_norm": 0.4020296633243561, "learning_rate": 0.0001071118530884808, "loss": 1.4783, "step": 1397 }, { "epoch": 0.2055504502848741, "grad_norm": 0.49085733294487, "learning_rate": 0.0001070450751252087, "loss": 1.3501, "step": 1398 }, { "epoch": 0.2056974820804999, "grad_norm": 0.5985439419746399, "learning_rate": 0.00010697829716193657, "loss": 1.4529, "step": 1399 }, { "epoch": 0.2058445138761257, "grad_norm": 0.44971129298210144, "learning_rate": 0.00010691151919866444, "loss": 1.3674, "step": 1400 }, { "epoch": 0.2059915456717515, "grad_norm": 0.4064272940158844, "learning_rate": 0.00010684474123539232, "loss": 1.347, "step": 1401 }, { "epoch": 0.20613857746737732, "grad_norm": 0.4543517827987671, "learning_rate": 0.0001067779632721202, "loss": 1.1085, "step": 1402 }, { "epoch": 0.20628560926300313, "grad_norm": 0.5459780097007751, "learning_rate": 0.00010671118530884809, "loss": 0.8187, "step": 1403 }, { "epoch": 0.20643264105862894, "grad_norm": 0.5268399119377136, "learning_rate": 0.00010664440734557598, "loss": 0.8015, "step": 1404 }, { "epoch": 0.20657967285425474, "grad_norm": 0.5893176198005676, "learning_rate": 0.00010657762938230385, "loss": 0.7785, "step": 1405 }, { "epoch": 0.20672670464988055, "grad_norm": 0.6806588768959045, "learning_rate": 0.00010651085141903172, "loss": 0.977, "step": 1406 }, { "epoch": 0.20687373644550633, "grad_norm": 0.502508282661438, "learning_rate": 0.0001064440734557596, "loss": 1.2477, "step": 1407 }, { "epoch": 0.20702076824113214, "grad_norm": 0.5332909226417542, "learning_rate": 0.0001063772954924875, "loss": 1.2022, "step": 1408 }, { "epoch": 0.20716780003675794, "grad_norm": 0.638706386089325, "learning_rate": 0.00010631051752921537, "loss": 0.9307, "step": 1409 }, { "epoch": 0.20731483183238375, "grad_norm": 0.5182225704193115, "learning_rate": 0.00010624373956594324, "loss": 1.3132, "step": 1410 }, { "epoch": 0.20746186362800956, "grad_norm": 0.6220089793205261, "learning_rate": 0.00010617696160267111, "loss": 1.1023, "step": 1411 }, { "epoch": 0.20760889542363536, "grad_norm": 0.5763335824012756, "learning_rate": 0.000106110183639399, "loss": 1.1903, "step": 1412 }, { "epoch": 0.20775592721926117, "grad_norm": 0.44323110580444336, "learning_rate": 0.00010604340567612687, "loss": 1.7113, "step": 1413 }, { "epoch": 0.20790295901488698, "grad_norm": 0.4095524549484253, "learning_rate": 0.00010597662771285477, "loss": 1.681, "step": 1414 }, { "epoch": 0.20804999081051279, "grad_norm": 0.5952494144439697, "learning_rate": 0.00010590984974958265, "loss": 1.0684, "step": 1415 }, { "epoch": 0.20819702260613857, "grad_norm": 0.6459909081459045, "learning_rate": 0.00010584307178631052, "loss": 1.0783, "step": 1416 }, { "epoch": 0.20834405440176437, "grad_norm": 0.4397619962692261, "learning_rate": 0.0001057762938230384, "loss": 1.6881, "step": 1417 }, { "epoch": 0.20849108619739018, "grad_norm": 0.5594138503074646, "learning_rate": 0.00010570951585976627, "loss": 1.198, "step": 1418 }, { "epoch": 0.208638117993016, "grad_norm": 0.5689383745193481, "learning_rate": 0.00010564273789649417, "loss": 1.3629, "step": 1419 }, { "epoch": 0.2087851497886418, "grad_norm": 0.4565649926662445, "learning_rate": 0.00010557595993322204, "loss": 1.6786, "step": 1420 }, { "epoch": 0.2089321815842676, "grad_norm": 0.5744721293449402, "learning_rate": 0.00010550918196994993, "loss": 1.2112, "step": 1421 }, { "epoch": 0.2090792133798934, "grad_norm": 0.7869272232055664, "learning_rate": 0.0001054424040066778, "loss": 0.9268, "step": 1422 }, { "epoch": 0.20922624517551922, "grad_norm": 0.5806440114974976, "learning_rate": 0.00010537562604340567, "loss": 1.1165, "step": 1423 }, { "epoch": 0.20937327697114502, "grad_norm": 0.41275402903556824, "learning_rate": 0.00010530884808013357, "loss": 1.3561, "step": 1424 }, { "epoch": 0.2095203087667708, "grad_norm": 0.47253841161727905, "learning_rate": 0.00010524207011686145, "loss": 1.3255, "step": 1425 }, { "epoch": 0.2096673405623966, "grad_norm": 0.5849193334579468, "learning_rate": 0.00010517529215358932, "loss": 1.4035, "step": 1426 }, { "epoch": 0.20981437235802242, "grad_norm": 0.5093145370483398, "learning_rate": 0.0001051085141903172, "loss": 1.4491, "step": 1427 }, { "epoch": 0.20996140415364822, "grad_norm": 0.9342120289802551, "learning_rate": 0.00010504173622704507, "loss": 1.0332, "step": 1428 }, { "epoch": 0.21010843594927403, "grad_norm": 0.6733828783035278, "learning_rate": 0.00010497495826377295, "loss": 1.6213, "step": 1429 }, { "epoch": 0.21025546774489984, "grad_norm": 0.7582636475563049, "learning_rate": 0.00010490818030050084, "loss": 1.3406, "step": 1430 }, { "epoch": 0.21040249954052564, "grad_norm": 0.5357018113136292, "learning_rate": 0.00010484140233722873, "loss": 0.9695, "step": 1431 }, { "epoch": 0.21054953133615145, "grad_norm": 0.4809349775314331, "learning_rate": 0.0001047746243739566, "loss": 0.9002, "step": 1432 }, { "epoch": 0.21069656313177726, "grad_norm": 0.7235410213470459, "learning_rate": 0.00010470784641068447, "loss": 0.9776, "step": 1433 }, { "epoch": 0.21084359492740304, "grad_norm": 0.4683224558830261, "learning_rate": 0.00010464106844741235, "loss": 1.0872, "step": 1434 }, { "epoch": 0.21099062672302885, "grad_norm": 0.7496999502182007, "learning_rate": 0.00010457429048414025, "loss": 1.1166, "step": 1435 }, { "epoch": 0.21113765851865465, "grad_norm": 0.48812007904052734, "learning_rate": 0.00010450751252086812, "loss": 1.4402, "step": 1436 }, { "epoch": 0.21128469031428046, "grad_norm": 0.6077368259429932, "learning_rate": 0.00010444073455759599, "loss": 1.0497, "step": 1437 }, { "epoch": 0.21143172210990627, "grad_norm": 0.8213294744491577, "learning_rate": 0.00010437395659432388, "loss": 1.1782, "step": 1438 }, { "epoch": 0.21157875390553207, "grad_norm": 0.47009503841400146, "learning_rate": 0.00010430717863105175, "loss": 1.2578, "step": 1439 }, { "epoch": 0.21172578570115788, "grad_norm": 0.47622379660606384, "learning_rate": 0.00010424040066777965, "loss": 1.5582, "step": 1440 }, { "epoch": 0.2118728174967837, "grad_norm": 0.5707595348358154, "learning_rate": 0.00010417362270450753, "loss": 1.0956, "step": 1441 }, { "epoch": 0.2120198492924095, "grad_norm": 0.5201634764671326, "learning_rate": 0.0001041068447412354, "loss": 1.2529, "step": 1442 }, { "epoch": 0.21216688108803528, "grad_norm": 0.6099618673324585, "learning_rate": 0.00010404006677796327, "loss": 1.3538, "step": 1443 }, { "epoch": 0.21231391288366108, "grad_norm": 0.5286110639572144, "learning_rate": 0.00010397328881469115, "loss": 1.6408, "step": 1444 }, { "epoch": 0.2124609446792869, "grad_norm": 0.5776660442352295, "learning_rate": 0.00010390651085141905, "loss": 1.1288, "step": 1445 }, { "epoch": 0.2126079764749127, "grad_norm": 0.3446873426437378, "learning_rate": 0.00010383973288814692, "loss": 1.778, "step": 1446 }, { "epoch": 0.2127550082705385, "grad_norm": 0.5131317973136902, "learning_rate": 0.0001037729549248748, "loss": 1.448, "step": 1447 }, { "epoch": 0.2129020400661643, "grad_norm": 0.39918696880340576, "learning_rate": 0.00010370617696160268, "loss": 1.5529, "step": 1448 }, { "epoch": 0.21304907186179012, "grad_norm": 0.6298250555992126, "learning_rate": 0.00010363939899833055, "loss": 1.2116, "step": 1449 }, { "epoch": 0.21319610365741593, "grad_norm": 0.6303383708000183, "learning_rate": 0.00010357262103505843, "loss": 1.2543, "step": 1450 }, { "epoch": 0.21334313545304173, "grad_norm": 0.5127140879631042, "learning_rate": 0.00010350584307178633, "loss": 1.2179, "step": 1451 }, { "epoch": 0.2134901672486675, "grad_norm": 0.5720611810684204, "learning_rate": 0.0001034390651085142, "loss": 1.0249, "step": 1452 }, { "epoch": 0.21363719904429332, "grad_norm": 0.5802286863327026, "learning_rate": 0.00010337228714524207, "loss": 1.3544, "step": 1453 }, { "epoch": 0.21378423083991913, "grad_norm": 0.49731141328811646, "learning_rate": 0.00010330550918196994, "loss": 1.2371, "step": 1454 }, { "epoch": 0.21393126263554493, "grad_norm": 0.5947168469429016, "learning_rate": 0.00010323873121869783, "loss": 1.1551, "step": 1455 }, { "epoch": 0.21407829443117074, "grad_norm": 0.4830482304096222, "learning_rate": 0.00010317195325542572, "loss": 1.1125, "step": 1456 }, { "epoch": 0.21422532622679655, "grad_norm": 0.5110485553741455, "learning_rate": 0.0001031051752921536, "loss": 1.5625, "step": 1457 }, { "epoch": 0.21437235802242235, "grad_norm": 0.5018860697746277, "learning_rate": 0.00010303839732888148, "loss": 1.1129, "step": 1458 }, { "epoch": 0.21451938981804816, "grad_norm": 0.6442530155181885, "learning_rate": 0.00010297161936560935, "loss": 1.1443, "step": 1459 }, { "epoch": 0.21466642161367397, "grad_norm": 0.5169264078140259, "learning_rate": 0.00010290484140233722, "loss": 1.1974, "step": 1460 }, { "epoch": 0.21481345340929975, "grad_norm": 0.5404734015464783, "learning_rate": 0.00010283806343906512, "loss": 1.2239, "step": 1461 }, { "epoch": 0.21496048520492556, "grad_norm": 0.6222432255744934, "learning_rate": 0.000102771285475793, "loss": 1.1641, "step": 1462 }, { "epoch": 0.21510751700055136, "grad_norm": 0.6306103467941284, "learning_rate": 0.00010270450751252087, "loss": 1.1869, "step": 1463 }, { "epoch": 0.21525454879617717, "grad_norm": 0.44758862257003784, "learning_rate": 0.00010263772954924876, "loss": 1.2286, "step": 1464 }, { "epoch": 0.21540158059180298, "grad_norm": 0.6180806756019592, "learning_rate": 0.00010257095158597663, "loss": 0.971, "step": 1465 }, { "epoch": 0.21554861238742878, "grad_norm": 0.5438312292098999, "learning_rate": 0.0001025041736227045, "loss": 1.5828, "step": 1466 }, { "epoch": 0.2156956441830546, "grad_norm": 1.0979804992675781, "learning_rate": 0.0001024373956594324, "loss": 0.976, "step": 1467 }, { "epoch": 0.2158426759786804, "grad_norm": 0.6312013268470764, "learning_rate": 0.00010237061769616028, "loss": 1.5322, "step": 1468 }, { "epoch": 0.2159897077743062, "grad_norm": 0.571880042552948, "learning_rate": 0.00010230383973288815, "loss": 1.2013, "step": 1469 }, { "epoch": 0.21613673956993198, "grad_norm": 0.7522807121276855, "learning_rate": 0.00010223706176961602, "loss": 1.125, "step": 1470 }, { "epoch": 0.2162837713655578, "grad_norm": 0.5555011630058289, "learning_rate": 0.0001021702838063439, "loss": 1.3202, "step": 1471 }, { "epoch": 0.2164308031611836, "grad_norm": 0.4390304982662201, "learning_rate": 0.0001021035058430718, "loss": 1.208, "step": 1472 }, { "epoch": 0.2165778349568094, "grad_norm": 0.4774854779243469, "learning_rate": 0.00010203672787979967, "loss": 1.381, "step": 1473 }, { "epoch": 0.2167248667524352, "grad_norm": 0.5723181962966919, "learning_rate": 0.00010196994991652756, "loss": 1.4832, "step": 1474 }, { "epoch": 0.21687189854806102, "grad_norm": 0.6024545431137085, "learning_rate": 0.00010190317195325543, "loss": 1.4042, "step": 1475 }, { "epoch": 0.21701893034368683, "grad_norm": 0.5169479250907898, "learning_rate": 0.0001018363939899833, "loss": 1.2394, "step": 1476 }, { "epoch": 0.21716596213931263, "grad_norm": 0.5577511787414551, "learning_rate": 0.0001017696160267112, "loss": 1.3572, "step": 1477 }, { "epoch": 0.21731299393493844, "grad_norm": 0.5325562357902527, "learning_rate": 0.00010170283806343908, "loss": 1.3265, "step": 1478 }, { "epoch": 0.21746002573056422, "grad_norm": 0.45983731746673584, "learning_rate": 0.00010163606010016695, "loss": 1.5578, "step": 1479 }, { "epoch": 0.21760705752619003, "grad_norm": 0.7266664505004883, "learning_rate": 0.00010156928213689482, "loss": 0.8865, "step": 1480 }, { "epoch": 0.21775408932181584, "grad_norm": 0.6747735142707825, "learning_rate": 0.00010150250417362271, "loss": 1.5554, "step": 1481 }, { "epoch": 0.21790112111744164, "grad_norm": 0.4483897387981415, "learning_rate": 0.00010143572621035058, "loss": 1.4753, "step": 1482 }, { "epoch": 0.21804815291306745, "grad_norm": 0.5380368232727051, "learning_rate": 0.00010136894824707848, "loss": 1.3096, "step": 1483 }, { "epoch": 0.21819518470869326, "grad_norm": 0.5236117243766785, "learning_rate": 0.00010130217028380636, "loss": 1.1872, "step": 1484 }, { "epoch": 0.21834221650431906, "grad_norm": 0.48784780502319336, "learning_rate": 0.00010123539232053423, "loss": 1.2038, "step": 1485 }, { "epoch": 0.21848924829994487, "grad_norm": 0.4815306067466736, "learning_rate": 0.0001011686143572621, "loss": 1.7458, "step": 1486 }, { "epoch": 0.21863628009557068, "grad_norm": 0.5593538284301758, "learning_rate": 0.00010110183639398998, "loss": 1.2355, "step": 1487 }, { "epoch": 0.21878331189119646, "grad_norm": 0.6971772313117981, "learning_rate": 0.00010103505843071788, "loss": 1.0682, "step": 1488 }, { "epoch": 0.21893034368682227, "grad_norm": 0.5186421871185303, "learning_rate": 0.00010096828046744575, "loss": 1.375, "step": 1489 }, { "epoch": 0.21907737548244807, "grad_norm": 0.5356982350349426, "learning_rate": 0.00010090150250417362, "loss": 1.3698, "step": 1490 }, { "epoch": 0.21922440727807388, "grad_norm": 0.534366250038147, "learning_rate": 0.00010083472454090151, "loss": 1.1856, "step": 1491 }, { "epoch": 0.2193714390736997, "grad_norm": 0.5797879695892334, "learning_rate": 0.00010076794657762938, "loss": 1.2255, "step": 1492 }, { "epoch": 0.2195184708693255, "grad_norm": 0.5341479182243347, "learning_rate": 0.00010070116861435728, "loss": 1.1468, "step": 1493 }, { "epoch": 0.2196655026649513, "grad_norm": 0.5192393660545349, "learning_rate": 0.00010063439065108516, "loss": 1.0476, "step": 1494 }, { "epoch": 0.2198125344605771, "grad_norm": 0.6609951257705688, "learning_rate": 0.00010056761268781303, "loss": 1.1652, "step": 1495 }, { "epoch": 0.21995956625620292, "grad_norm": 0.4722447395324707, "learning_rate": 0.0001005008347245409, "loss": 0.8527, "step": 1496 }, { "epoch": 0.2201065980518287, "grad_norm": 0.5188907384872437, "learning_rate": 0.00010043405676126878, "loss": 0.9645, "step": 1497 }, { "epoch": 0.2202536298474545, "grad_norm": 0.5876390933990479, "learning_rate": 0.00010036727879799666, "loss": 1.0214, "step": 1498 }, { "epoch": 0.2204006616430803, "grad_norm": 0.45582205057144165, "learning_rate": 0.00010030050083472455, "loss": 1.3508, "step": 1499 }, { "epoch": 0.22054769343870612, "grad_norm": 0.6699544787406921, "learning_rate": 0.00010023372287145244, "loss": 1.0011, "step": 1500 }, { "epoch": 0.22069472523433192, "grad_norm": 0.5445151329040527, "learning_rate": 0.00010016694490818031, "loss": 1.1928, "step": 1501 }, { "epoch": 0.22084175702995773, "grad_norm": 0.6422536373138428, "learning_rate": 0.00010010016694490818, "loss": 1.1152, "step": 1502 }, { "epoch": 0.22098878882558354, "grad_norm": 0.6705479025840759, "learning_rate": 0.00010003338898163605, "loss": 1.1119, "step": 1503 }, { "epoch": 0.22113582062120934, "grad_norm": 0.6382769346237183, "learning_rate": 9.996661101836394e-05, "loss": 1.5169, "step": 1504 }, { "epoch": 0.22128285241683515, "grad_norm": 0.5958567261695862, "learning_rate": 9.989983305509183e-05, "loss": 1.2388, "step": 1505 }, { "epoch": 0.22142988421246093, "grad_norm": 0.4882057309150696, "learning_rate": 9.98330550918197e-05, "loss": 1.2652, "step": 1506 }, { "epoch": 0.22157691600808674, "grad_norm": 0.5526162385940552, "learning_rate": 9.976627712854757e-05, "loss": 1.4889, "step": 1507 }, { "epoch": 0.22172394780371255, "grad_norm": 0.7655798196792603, "learning_rate": 9.969949916527546e-05, "loss": 1.2295, "step": 1508 }, { "epoch": 0.22187097959933835, "grad_norm": 0.6225388050079346, "learning_rate": 9.963272120200335e-05, "loss": 1.3307, "step": 1509 }, { "epoch": 0.22201801139496416, "grad_norm": 0.4264012277126312, "learning_rate": 9.956594323873122e-05, "loss": 1.5621, "step": 1510 }, { "epoch": 0.22216504319058997, "grad_norm": 0.6170988082885742, "learning_rate": 9.949916527545911e-05, "loss": 1.0422, "step": 1511 }, { "epoch": 0.22231207498621577, "grad_norm": 0.5800219178199768, "learning_rate": 9.943238731218698e-05, "loss": 1.5355, "step": 1512 }, { "epoch": 0.22245910678184158, "grad_norm": 0.8943714499473572, "learning_rate": 9.936560934891487e-05, "loss": 1.1849, "step": 1513 }, { "epoch": 0.2226061385774674, "grad_norm": 0.5949586033821106, "learning_rate": 9.929883138564274e-05, "loss": 1.3829, "step": 1514 }, { "epoch": 0.22275317037309317, "grad_norm": 0.7422260642051697, "learning_rate": 9.923205342237061e-05, "loss": 1.3405, "step": 1515 }, { "epoch": 0.22290020216871897, "grad_norm": 0.5302927494049072, "learning_rate": 9.91652754590985e-05, "loss": 0.9903, "step": 1516 }, { "epoch": 0.22304723396434478, "grad_norm": 0.6824787259101868, "learning_rate": 9.909849749582639e-05, "loss": 1.0799, "step": 1517 }, { "epoch": 0.2231942657599706, "grad_norm": 0.5038504004478455, "learning_rate": 9.903171953255426e-05, "loss": 1.8979, "step": 1518 }, { "epoch": 0.2233412975555964, "grad_norm": 0.5620954036712646, "learning_rate": 9.896494156928215e-05, "loss": 1.0883, "step": 1519 }, { "epoch": 0.2234883293512222, "grad_norm": 0.4869268834590912, "learning_rate": 9.889816360601002e-05, "loss": 1.4357, "step": 1520 }, { "epoch": 0.223635361146848, "grad_norm": 0.44320037961006165, "learning_rate": 9.883138564273791e-05, "loss": 1.8282, "step": 1521 }, { "epoch": 0.22378239294247382, "grad_norm": 0.5815877914428711, "learning_rate": 9.876460767946578e-05, "loss": 1.1636, "step": 1522 }, { "epoch": 0.22392942473809963, "grad_norm": 0.5955780744552612, "learning_rate": 9.869782971619365e-05, "loss": 0.8358, "step": 1523 }, { "epoch": 0.2240764565337254, "grad_norm": 0.6086217164993286, "learning_rate": 9.863105175292154e-05, "loss": 1.1487, "step": 1524 }, { "epoch": 0.2242234883293512, "grad_norm": 0.7026903033256531, "learning_rate": 9.856427378964941e-05, "loss": 0.9554, "step": 1525 }, { "epoch": 0.22437052012497702, "grad_norm": 0.4932984411716461, "learning_rate": 9.84974958263773e-05, "loss": 1.3818, "step": 1526 }, { "epoch": 0.22451755192060283, "grad_norm": 0.4403376877307892, "learning_rate": 9.843071786310519e-05, "loss": 1.8058, "step": 1527 }, { "epoch": 0.22466458371622863, "grad_norm": 0.4703921377658844, "learning_rate": 9.836393989983306e-05, "loss": 0.9215, "step": 1528 }, { "epoch": 0.22481161551185444, "grad_norm": 0.47163906693458557, "learning_rate": 9.829716193656095e-05, "loss": 1.2204, "step": 1529 }, { "epoch": 0.22495864730748025, "grad_norm": 0.7718981504440308, "learning_rate": 9.823038397328882e-05, "loss": 1.1704, "step": 1530 }, { "epoch": 0.22510567910310605, "grad_norm": 0.5518196821212769, "learning_rate": 9.816360601001669e-05, "loss": 1.2836, "step": 1531 }, { "epoch": 0.22525271089873186, "grad_norm": 0.6861315965652466, "learning_rate": 9.809682804674458e-05, "loss": 0.9095, "step": 1532 }, { "epoch": 0.22539974269435764, "grad_norm": 0.3890465795993805, "learning_rate": 9.803005008347245e-05, "loss": 1.3227, "step": 1533 }, { "epoch": 0.22554677448998345, "grad_norm": 0.4975080192089081, "learning_rate": 9.796327212020034e-05, "loss": 1.3868, "step": 1534 }, { "epoch": 0.22569380628560926, "grad_norm": 0.6168373227119446, "learning_rate": 9.789649415692823e-05, "loss": 1.2591, "step": 1535 }, { "epoch": 0.22584083808123506, "grad_norm": 0.5135233998298645, "learning_rate": 9.78297161936561e-05, "loss": 1.0415, "step": 1536 }, { "epoch": 0.22598786987686087, "grad_norm": 0.7084352374076843, "learning_rate": 9.776293823038399e-05, "loss": 1.0491, "step": 1537 }, { "epoch": 0.22613490167248668, "grad_norm": 0.6173841953277588, "learning_rate": 9.769616026711186e-05, "loss": 1.0001, "step": 1538 }, { "epoch": 0.22628193346811248, "grad_norm": 0.6403598189353943, "learning_rate": 9.762938230383973e-05, "loss": 1.165, "step": 1539 }, { "epoch": 0.2264289652637383, "grad_norm": 0.409586101770401, "learning_rate": 9.756260434056762e-05, "loss": 1.3847, "step": 1540 }, { "epoch": 0.2265759970593641, "grad_norm": 0.5137646198272705, "learning_rate": 9.749582637729549e-05, "loss": 1.2716, "step": 1541 }, { "epoch": 0.2267230288549899, "grad_norm": 0.4802088141441345, "learning_rate": 9.742904841402337e-05, "loss": 1.2132, "step": 1542 }, { "epoch": 0.22687006065061568, "grad_norm": 0.5435311794281006, "learning_rate": 9.736227045075125e-05, "loss": 1.2759, "step": 1543 }, { "epoch": 0.2270170924462415, "grad_norm": 0.7979618310928345, "learning_rate": 9.729549248747914e-05, "loss": 1.1581, "step": 1544 }, { "epoch": 0.2271641242418673, "grad_norm": 0.6477878093719482, "learning_rate": 9.722871452420703e-05, "loss": 1.072, "step": 1545 }, { "epoch": 0.2273111560374931, "grad_norm": 0.5740229487419128, "learning_rate": 9.71619365609349e-05, "loss": 1.0303, "step": 1546 }, { "epoch": 0.2274581878331189, "grad_norm": 0.42779847979545593, "learning_rate": 9.709515859766277e-05, "loss": 1.2809, "step": 1547 }, { "epoch": 0.22760521962874472, "grad_norm": 0.7799987196922302, "learning_rate": 9.702838063439066e-05, "loss": 1.2307, "step": 1548 }, { "epoch": 0.22775225142437053, "grad_norm": 0.42078036069869995, "learning_rate": 9.696160267111853e-05, "loss": 1.5487, "step": 1549 }, { "epoch": 0.22789928321999633, "grad_norm": 0.553041934967041, "learning_rate": 9.68948247078464e-05, "loss": 1.5089, "step": 1550 }, { "epoch": 0.22804631501562214, "grad_norm": 0.5207594037055969, "learning_rate": 9.682804674457429e-05, "loss": 1.5134, "step": 1551 }, { "epoch": 0.22819334681124792, "grad_norm": 0.5210331678390503, "learning_rate": 9.676126878130218e-05, "loss": 1.0434, "step": 1552 }, { "epoch": 0.22834037860687373, "grad_norm": 0.6796066164970398, "learning_rate": 9.669449081803006e-05, "loss": 1.0941, "step": 1553 }, { "epoch": 0.22848741040249954, "grad_norm": 0.4530833959579468, "learning_rate": 9.662771285475794e-05, "loss": 1.2483, "step": 1554 }, { "epoch": 0.22863444219812534, "grad_norm": 0.7332902550697327, "learning_rate": 9.656093489148581e-05, "loss": 0.8743, "step": 1555 }, { "epoch": 0.22878147399375115, "grad_norm": 0.5429344177246094, "learning_rate": 9.64941569282137e-05, "loss": 1.6047, "step": 1556 }, { "epoch": 0.22892850578937696, "grad_norm": 0.487344354391098, "learning_rate": 9.642737896494157e-05, "loss": 1.6215, "step": 1557 }, { "epoch": 0.22907553758500276, "grad_norm": 0.5374423861503601, "learning_rate": 9.636060100166944e-05, "loss": 1.2506, "step": 1558 }, { "epoch": 0.22922256938062857, "grad_norm": 0.6167294979095459, "learning_rate": 9.629382303839733e-05, "loss": 1.1114, "step": 1559 }, { "epoch": 0.22936960117625438, "grad_norm": 0.41463232040405273, "learning_rate": 9.62270450751252e-05, "loss": 1.208, "step": 1560 }, { "epoch": 0.22951663297188016, "grad_norm": 0.4805991053581238, "learning_rate": 9.616026711185309e-05, "loss": 1.3728, "step": 1561 }, { "epoch": 0.22966366476750597, "grad_norm": 0.5710384845733643, "learning_rate": 9.609348914858098e-05, "loss": 1.401, "step": 1562 }, { "epoch": 0.22981069656313177, "grad_norm": 0.5754150152206421, "learning_rate": 9.602671118530885e-05, "loss": 0.9986, "step": 1563 }, { "epoch": 0.22995772835875758, "grad_norm": 0.5330164432525635, "learning_rate": 9.595993322203674e-05, "loss": 1.2755, "step": 1564 }, { "epoch": 0.2301047601543834, "grad_norm": 0.5630112886428833, "learning_rate": 9.589315525876461e-05, "loss": 1.1257, "step": 1565 }, { "epoch": 0.2302517919500092, "grad_norm": 0.5001299381256104, "learning_rate": 9.582637729549248e-05, "loss": 1.4125, "step": 1566 }, { "epoch": 0.230398823745635, "grad_norm": 0.6047416925430298, "learning_rate": 9.575959933222037e-05, "loss": 1.151, "step": 1567 }, { "epoch": 0.2305458555412608, "grad_norm": 0.5558273196220398, "learning_rate": 9.569282136894824e-05, "loss": 1.3157, "step": 1568 }, { "epoch": 0.23069288733688662, "grad_norm": 0.6380701661109924, "learning_rate": 9.562604340567613e-05, "loss": 0.798, "step": 1569 }, { "epoch": 0.2308399191325124, "grad_norm": 0.5797339677810669, "learning_rate": 9.555926544240402e-05, "loss": 0.9906, "step": 1570 }, { "epoch": 0.2309869509281382, "grad_norm": 0.5026519894599915, "learning_rate": 9.549248747913189e-05, "loss": 1.7283, "step": 1571 }, { "epoch": 0.231133982723764, "grad_norm": 0.4569988548755646, "learning_rate": 9.542570951585978e-05, "loss": 1.3272, "step": 1572 }, { "epoch": 0.23128101451938982, "grad_norm": 0.5730646252632141, "learning_rate": 9.535893155258765e-05, "loss": 0.9397, "step": 1573 }, { "epoch": 0.23142804631501562, "grad_norm": 0.6399301886558533, "learning_rate": 9.529215358931554e-05, "loss": 1.1593, "step": 1574 }, { "epoch": 0.23157507811064143, "grad_norm": 0.5993507504463196, "learning_rate": 9.522537562604341e-05, "loss": 1.223, "step": 1575 }, { "epoch": 0.23172210990626724, "grad_norm": 0.6368708610534668, "learning_rate": 9.515859766277128e-05, "loss": 1.0492, "step": 1576 }, { "epoch": 0.23186914170189304, "grad_norm": 0.5560176372528076, "learning_rate": 9.509181969949917e-05, "loss": 1.3058, "step": 1577 }, { "epoch": 0.23201617349751885, "grad_norm": 0.6977183222770691, "learning_rate": 9.502504173622706e-05, "loss": 1.2342, "step": 1578 }, { "epoch": 0.23216320529314463, "grad_norm": 0.5898550748825073, "learning_rate": 9.495826377295493e-05, "loss": 1.1585, "step": 1579 }, { "epoch": 0.23231023708877044, "grad_norm": 0.4923187792301178, "learning_rate": 9.489148580968282e-05, "loss": 1.1715, "step": 1580 }, { "epoch": 0.23245726888439625, "grad_norm": 0.43065810203552246, "learning_rate": 9.482470784641069e-05, "loss": 1.1439, "step": 1581 }, { "epoch": 0.23260430068002205, "grad_norm": 0.6542065143585205, "learning_rate": 9.475792988313858e-05, "loss": 0.9522, "step": 1582 }, { "epoch": 0.23275133247564786, "grad_norm": 0.5739621520042419, "learning_rate": 9.469115191986645e-05, "loss": 1.1297, "step": 1583 }, { "epoch": 0.23289836427127367, "grad_norm": 0.5379124879837036, "learning_rate": 9.462437395659432e-05, "loss": 0.9137, "step": 1584 }, { "epoch": 0.23304539606689947, "grad_norm": 0.5685023069381714, "learning_rate": 9.455759599332221e-05, "loss": 1.3995, "step": 1585 }, { "epoch": 0.23319242786252528, "grad_norm": 0.5479350090026855, "learning_rate": 9.449081803005008e-05, "loss": 1.3898, "step": 1586 }, { "epoch": 0.2333394596581511, "grad_norm": 0.9416941404342651, "learning_rate": 9.442404006677797e-05, "loss": 1.1825, "step": 1587 }, { "epoch": 0.23348649145377687, "grad_norm": 0.625636100769043, "learning_rate": 9.435726210350586e-05, "loss": 1.2514, "step": 1588 }, { "epoch": 0.23363352324940267, "grad_norm": 0.6271191835403442, "learning_rate": 9.429048414023373e-05, "loss": 1.2348, "step": 1589 }, { "epoch": 0.23378055504502848, "grad_norm": 0.7510327696800232, "learning_rate": 9.422370617696162e-05, "loss": 1.0318, "step": 1590 }, { "epoch": 0.2339275868406543, "grad_norm": 0.7072024345397949, "learning_rate": 9.415692821368949e-05, "loss": 0.9552, "step": 1591 }, { "epoch": 0.2340746186362801, "grad_norm": 0.49493688344955444, "learning_rate": 9.409015025041736e-05, "loss": 0.9812, "step": 1592 }, { "epoch": 0.2342216504319059, "grad_norm": 0.5083461403846741, "learning_rate": 9.402337228714525e-05, "loss": 1.3654, "step": 1593 }, { "epoch": 0.2343686822275317, "grad_norm": 0.5150706171989441, "learning_rate": 9.395659432387312e-05, "loss": 1.898, "step": 1594 }, { "epoch": 0.23451571402315752, "grad_norm": 0.6049538850784302, "learning_rate": 9.388981636060101e-05, "loss": 1.0878, "step": 1595 }, { "epoch": 0.23466274581878332, "grad_norm": 0.520978569984436, "learning_rate": 9.38230383973289e-05, "loss": 1.4134, "step": 1596 }, { "epoch": 0.2348097776144091, "grad_norm": 0.46890705823898315, "learning_rate": 9.375626043405677e-05, "loss": 0.9929, "step": 1597 }, { "epoch": 0.2349568094100349, "grad_norm": 0.5594738721847534, "learning_rate": 9.368948247078465e-05, "loss": 1.0842, "step": 1598 }, { "epoch": 0.23510384120566072, "grad_norm": 0.5777500867843628, "learning_rate": 9.362270450751253e-05, "loss": 1.2479, "step": 1599 }, { "epoch": 0.23525087300128653, "grad_norm": 0.6888443231582642, "learning_rate": 9.35559265442404e-05, "loss": 1.2286, "step": 1600 }, { "epoch": 0.23539790479691233, "grad_norm": 0.47768843173980713, "learning_rate": 9.348914858096829e-05, "loss": 1.3158, "step": 1601 }, { "epoch": 0.23554493659253814, "grad_norm": 0.6783496737480164, "learning_rate": 9.342237061769616e-05, "loss": 1.1878, "step": 1602 }, { "epoch": 0.23569196838816395, "grad_norm": 0.5440490245819092, "learning_rate": 9.335559265442403e-05, "loss": 0.8093, "step": 1603 }, { "epoch": 0.23583900018378975, "grad_norm": 0.43675413727760315, "learning_rate": 9.328881469115192e-05, "loss": 1.7033, "step": 1604 }, { "epoch": 0.23598603197941556, "grad_norm": 0.5121459364891052, "learning_rate": 9.322203672787981e-05, "loss": 1.2469, "step": 1605 }, { "epoch": 0.23613306377504134, "grad_norm": 0.4334491193294525, "learning_rate": 9.31552587646077e-05, "loss": 1.1134, "step": 1606 }, { "epoch": 0.23628009557066715, "grad_norm": 0.43863895535469055, "learning_rate": 9.308848080133557e-05, "loss": 1.8808, "step": 1607 }, { "epoch": 0.23642712736629296, "grad_norm": 0.6972218155860901, "learning_rate": 9.302170283806344e-05, "loss": 1.0348, "step": 1608 }, { "epoch": 0.23657415916191876, "grad_norm": 0.4624405801296234, "learning_rate": 9.295492487479133e-05, "loss": 1.3198, "step": 1609 }, { "epoch": 0.23672119095754457, "grad_norm": 0.6230312585830688, "learning_rate": 9.28881469115192e-05, "loss": 1.4127, "step": 1610 }, { "epoch": 0.23686822275317038, "grad_norm": 0.5790882110595703, "learning_rate": 9.282136894824707e-05, "loss": 1.2786, "step": 1611 }, { "epoch": 0.23701525454879618, "grad_norm": 0.4233240485191345, "learning_rate": 9.275459098497496e-05, "loss": 1.8517, "step": 1612 }, { "epoch": 0.237162286344422, "grad_norm": 0.5412300825119019, "learning_rate": 9.268781302170285e-05, "loss": 1.429, "step": 1613 }, { "epoch": 0.2373093181400478, "grad_norm": 0.6432968378067017, "learning_rate": 9.262103505843073e-05, "loss": 0.9079, "step": 1614 }, { "epoch": 0.23745634993567358, "grad_norm": 0.3766767084598541, "learning_rate": 9.255425709515861e-05, "loss": 1.1475, "step": 1615 }, { "epoch": 0.23760338173129938, "grad_norm": 0.6067112684249878, "learning_rate": 9.248747913188648e-05, "loss": 1.1446, "step": 1616 }, { "epoch": 0.2377504135269252, "grad_norm": 0.6048491597175598, "learning_rate": 9.242070116861437e-05, "loss": 1.1785, "step": 1617 }, { "epoch": 0.237897445322551, "grad_norm": 0.5706529021263123, "learning_rate": 9.235392320534224e-05, "loss": 1.0968, "step": 1618 }, { "epoch": 0.2380444771181768, "grad_norm": 0.7215713262557983, "learning_rate": 9.228714524207011e-05, "loss": 1.1042, "step": 1619 }, { "epoch": 0.2381915089138026, "grad_norm": 0.5657112002372742, "learning_rate": 9.2220367278798e-05, "loss": 1.3303, "step": 1620 }, { "epoch": 0.23833854070942842, "grad_norm": 0.6674312949180603, "learning_rate": 9.215358931552587e-05, "loss": 1.4215, "step": 1621 }, { "epoch": 0.23848557250505423, "grad_norm": 0.5474651455879211, "learning_rate": 9.208681135225376e-05, "loss": 1.114, "step": 1622 }, { "epoch": 0.23863260430068003, "grad_norm": 0.48078134655952454, "learning_rate": 9.202003338898165e-05, "loss": 0.8805, "step": 1623 }, { "epoch": 0.23877963609630581, "grad_norm": 0.46697431802749634, "learning_rate": 9.195325542570952e-05, "loss": 1.3561, "step": 1624 }, { "epoch": 0.23892666789193162, "grad_norm": 0.5205184817314148, "learning_rate": 9.18864774624374e-05, "loss": 1.3606, "step": 1625 }, { "epoch": 0.23907369968755743, "grad_norm": 0.42666003108024597, "learning_rate": 9.181969949916528e-05, "loss": 1.7079, "step": 1626 }, { "epoch": 0.23922073148318324, "grad_norm": 0.5831342935562134, "learning_rate": 9.175292153589315e-05, "loss": 1.2438, "step": 1627 }, { "epoch": 0.23936776327880904, "grad_norm": 0.5047021508216858, "learning_rate": 9.168614357262104e-05, "loss": 1.7536, "step": 1628 }, { "epoch": 0.23951479507443485, "grad_norm": 0.4610256850719452, "learning_rate": 9.161936560934891e-05, "loss": 1.4942, "step": 1629 }, { "epoch": 0.23966182687006066, "grad_norm": 0.5719903707504272, "learning_rate": 9.15525876460768e-05, "loss": 1.2388, "step": 1630 }, { "epoch": 0.23980885866568646, "grad_norm": 0.7543137073516846, "learning_rate": 9.148580968280469e-05, "loss": 1.0657, "step": 1631 }, { "epoch": 0.23995589046131227, "grad_norm": 0.7102645635604858, "learning_rate": 9.141903171953256e-05, "loss": 1.1561, "step": 1632 }, { "epoch": 0.24010292225693805, "grad_norm": 0.4386819005012512, "learning_rate": 9.135225375626045e-05, "loss": 1.1101, "step": 1633 }, { "epoch": 0.24024995405256386, "grad_norm": 0.6141283512115479, "learning_rate": 9.128547579298832e-05, "loss": 0.9623, "step": 1634 }, { "epoch": 0.24039698584818966, "grad_norm": 0.4392020106315613, "learning_rate": 9.121869782971619e-05, "loss": 1.5823, "step": 1635 }, { "epoch": 0.24054401764381547, "grad_norm": 0.5040257573127747, "learning_rate": 9.115191986644408e-05, "loss": 0.8417, "step": 1636 }, { "epoch": 0.24069104943944128, "grad_norm": 0.6455694437026978, "learning_rate": 9.108514190317195e-05, "loss": 1.2555, "step": 1637 }, { "epoch": 0.2408380812350671, "grad_norm": 0.5364224910736084, "learning_rate": 9.101836393989984e-05, "loss": 1.378, "step": 1638 }, { "epoch": 0.2409851130306929, "grad_norm": 0.5365833044052124, "learning_rate": 9.095158597662771e-05, "loss": 1.2384, "step": 1639 }, { "epoch": 0.2411321448263187, "grad_norm": 0.5463657379150391, "learning_rate": 9.08848080133556e-05, "loss": 1.5997, "step": 1640 }, { "epoch": 0.2412791766219445, "grad_norm": 0.44339752197265625, "learning_rate": 9.081803005008348e-05, "loss": 1.5823, "step": 1641 }, { "epoch": 0.2414262084175703, "grad_norm": 0.5852905511856079, "learning_rate": 9.075125208681136e-05, "loss": 1.4596, "step": 1642 }, { "epoch": 0.2415732402131961, "grad_norm": 0.4833205044269562, "learning_rate": 9.068447412353923e-05, "loss": 1.5554, "step": 1643 }, { "epoch": 0.2417202720088219, "grad_norm": 0.46025893092155457, "learning_rate": 9.061769616026712e-05, "loss": 1.2965, "step": 1644 }, { "epoch": 0.2418673038044477, "grad_norm": 0.6678174138069153, "learning_rate": 9.055091819699499e-05, "loss": 1.199, "step": 1645 }, { "epoch": 0.24201433560007352, "grad_norm": 0.49132829904556274, "learning_rate": 9.048414023372288e-05, "loss": 1.0, "step": 1646 }, { "epoch": 0.24216136739569932, "grad_norm": 0.47107452154159546, "learning_rate": 9.041736227045075e-05, "loss": 1.5837, "step": 1647 }, { "epoch": 0.24230839919132513, "grad_norm": 0.6973981261253357, "learning_rate": 9.035058430717864e-05, "loss": 0.9913, "step": 1648 }, { "epoch": 0.24245543098695094, "grad_norm": 0.4977231025695801, "learning_rate": 9.028380634390652e-05, "loss": 1.2631, "step": 1649 }, { "epoch": 0.24260246278257674, "grad_norm": 0.6652663350105286, "learning_rate": 9.02170283806344e-05, "loss": 1.1995, "step": 1650 }, { "epoch": 0.24274949457820252, "grad_norm": 0.6486725211143494, "learning_rate": 9.015025041736227e-05, "loss": 1.4177, "step": 1651 }, { "epoch": 0.24289652637382833, "grad_norm": 0.6155788898468018, "learning_rate": 9.008347245409016e-05, "loss": 1.1471, "step": 1652 }, { "epoch": 0.24304355816945414, "grad_norm": 0.5114461779594421, "learning_rate": 9.001669449081803e-05, "loss": 1.2175, "step": 1653 }, { "epoch": 0.24319058996507995, "grad_norm": 0.605247437953949, "learning_rate": 8.994991652754592e-05, "loss": 0.8534, "step": 1654 }, { "epoch": 0.24333762176070575, "grad_norm": 0.4602752923965454, "learning_rate": 8.988313856427379e-05, "loss": 1.4223, "step": 1655 }, { "epoch": 0.24348465355633156, "grad_norm": 0.678560733795166, "learning_rate": 8.981636060100166e-05, "loss": 0.9716, "step": 1656 }, { "epoch": 0.24363168535195737, "grad_norm": 0.6179150342941284, "learning_rate": 8.974958263772955e-05, "loss": 1.5719, "step": 1657 }, { "epoch": 0.24377871714758317, "grad_norm": 0.5787912607192993, "learning_rate": 8.968280467445744e-05, "loss": 0.9868, "step": 1658 }, { "epoch": 0.24392574894320898, "grad_norm": 0.490427702665329, "learning_rate": 8.961602671118531e-05, "loss": 1.2411, "step": 1659 }, { "epoch": 0.24407278073883476, "grad_norm": 0.47056275606155396, "learning_rate": 8.95492487479132e-05, "loss": 1.2248, "step": 1660 }, { "epoch": 0.24421981253446057, "grad_norm": 0.5339208841323853, "learning_rate": 8.948247078464107e-05, "loss": 1.2891, "step": 1661 }, { "epoch": 0.24436684433008637, "grad_norm": 0.616615355014801, "learning_rate": 8.941569282136896e-05, "loss": 1.1065, "step": 1662 }, { "epoch": 0.24451387612571218, "grad_norm": 0.5172422528266907, "learning_rate": 8.934891485809683e-05, "loss": 1.2772, "step": 1663 }, { "epoch": 0.244660907921338, "grad_norm": 0.5793017745018005, "learning_rate": 8.92821368948247e-05, "loss": 0.9983, "step": 1664 }, { "epoch": 0.2448079397169638, "grad_norm": 0.45165905356407166, "learning_rate": 8.921535893155259e-05, "loss": 1.3652, "step": 1665 }, { "epoch": 0.2449549715125896, "grad_norm": 0.5071299076080322, "learning_rate": 8.914858096828048e-05, "loss": 1.4648, "step": 1666 }, { "epoch": 0.2451020033082154, "grad_norm": 0.5808746814727783, "learning_rate": 8.908180300500835e-05, "loss": 1.665, "step": 1667 }, { "epoch": 0.24524903510384122, "grad_norm": 0.49585914611816406, "learning_rate": 8.901502504173624e-05, "loss": 1.5984, "step": 1668 }, { "epoch": 0.245396066899467, "grad_norm": 0.6790951490402222, "learning_rate": 8.894824707846411e-05, "loss": 0.9743, "step": 1669 }, { "epoch": 0.2455430986950928, "grad_norm": 0.7088842391967773, "learning_rate": 8.8881469115192e-05, "loss": 1.0242, "step": 1670 }, { "epoch": 0.2456901304907186, "grad_norm": 0.6326477527618408, "learning_rate": 8.881469115191987e-05, "loss": 0.9755, "step": 1671 }, { "epoch": 0.24583716228634442, "grad_norm": 0.662755012512207, "learning_rate": 8.874791318864774e-05, "loss": 0.9729, "step": 1672 }, { "epoch": 0.24598419408197023, "grad_norm": 0.7197674512863159, "learning_rate": 8.868113522537563e-05, "loss": 0.9007, "step": 1673 }, { "epoch": 0.24613122587759603, "grad_norm": 0.4984396696090698, "learning_rate": 8.86143572621035e-05, "loss": 1.1645, "step": 1674 }, { "epoch": 0.24627825767322184, "grad_norm": 0.6155694723129272, "learning_rate": 8.854757929883139e-05, "loss": 1.3399, "step": 1675 }, { "epoch": 0.24642528946884765, "grad_norm": 0.5292329788208008, "learning_rate": 8.848080133555928e-05, "loss": 1.3137, "step": 1676 }, { "epoch": 0.24657232126447345, "grad_norm": 0.48193439841270447, "learning_rate": 8.841402337228715e-05, "loss": 1.147, "step": 1677 }, { "epoch": 0.24671935306009923, "grad_norm": 0.6016201972961426, "learning_rate": 8.834724540901504e-05, "loss": 0.9902, "step": 1678 }, { "epoch": 0.24686638485572504, "grad_norm": 0.3828026056289673, "learning_rate": 8.828046744574291e-05, "loss": 1.6211, "step": 1679 }, { "epoch": 0.24701341665135085, "grad_norm": 0.5384033918380737, "learning_rate": 8.821368948247078e-05, "loss": 1.1496, "step": 1680 }, { "epoch": 0.24716044844697665, "grad_norm": 0.5264310836791992, "learning_rate": 8.814691151919867e-05, "loss": 1.199, "step": 1681 }, { "epoch": 0.24730748024260246, "grad_norm": 0.43435773253440857, "learning_rate": 8.808013355592654e-05, "loss": 1.3269, "step": 1682 }, { "epoch": 0.24745451203822827, "grad_norm": 0.517509400844574, "learning_rate": 8.801335559265443e-05, "loss": 0.8295, "step": 1683 }, { "epoch": 0.24760154383385408, "grad_norm": 0.5610449910163879, "learning_rate": 8.794657762938232e-05, "loss": 1.6387, "step": 1684 }, { "epoch": 0.24774857562947988, "grad_norm": 0.6858127117156982, "learning_rate": 8.787979966611019e-05, "loss": 1.1126, "step": 1685 }, { "epoch": 0.2478956074251057, "grad_norm": 0.536293089389801, "learning_rate": 8.781302170283808e-05, "loss": 1.1935, "step": 1686 }, { "epoch": 0.24804263922073147, "grad_norm": 0.5378962755203247, "learning_rate": 8.774624373956595e-05, "loss": 1.3029, "step": 1687 }, { "epoch": 0.24818967101635728, "grad_norm": 0.5790378451347351, "learning_rate": 8.767946577629382e-05, "loss": 1.2891, "step": 1688 }, { "epoch": 0.24833670281198308, "grad_norm": 0.5398858785629272, "learning_rate": 8.761268781302171e-05, "loss": 1.3818, "step": 1689 }, { "epoch": 0.2484837346076089, "grad_norm": 0.5895360708236694, "learning_rate": 8.754590984974958e-05, "loss": 1.2354, "step": 1690 }, { "epoch": 0.2486307664032347, "grad_norm": 0.5782397985458374, "learning_rate": 8.747913188647745e-05, "loss": 1.2859, "step": 1691 }, { "epoch": 0.2487777981988605, "grad_norm": 0.7079206705093384, "learning_rate": 8.741235392320535e-05, "loss": 1.0043, "step": 1692 }, { "epoch": 0.2489248299944863, "grad_norm": 0.65590500831604, "learning_rate": 8.734557595993323e-05, "loss": 1.0928, "step": 1693 }, { "epoch": 0.24907186179011212, "grad_norm": 0.6019294857978821, "learning_rate": 8.727879799666111e-05, "loss": 1.1942, "step": 1694 }, { "epoch": 0.24921889358573793, "grad_norm": 0.6357347965240479, "learning_rate": 8.721202003338899e-05, "loss": 0.8711, "step": 1695 }, { "epoch": 0.2493659253813637, "grad_norm": 0.3839411735534668, "learning_rate": 8.714524207011686e-05, "loss": 1.1199, "step": 1696 }, { "epoch": 0.2495129571769895, "grad_norm": 0.6149453520774841, "learning_rate": 8.707846410684475e-05, "loss": 1.256, "step": 1697 }, { "epoch": 0.24965998897261532, "grad_norm": 0.4962863326072693, "learning_rate": 8.701168614357262e-05, "loss": 0.8514, "step": 1698 }, { "epoch": 0.24980702076824113, "grad_norm": 0.7369348406791687, "learning_rate": 8.694490818030051e-05, "loss": 1.0501, "step": 1699 }, { "epoch": 0.24995405256386694, "grad_norm": 0.4155783951282501, "learning_rate": 8.687813021702838e-05, "loss": 1.1216, "step": 1700 }, { "epoch": 0.25010108435949274, "grad_norm": 0.5694376230239868, "learning_rate": 8.681135225375627e-05, "loss": 1.2615, "step": 1701 }, { "epoch": 0.25024811615511855, "grad_norm": 0.684994101524353, "learning_rate": 8.674457429048415e-05, "loss": 1.115, "step": 1702 }, { "epoch": 0.25039514795074436, "grad_norm": 0.5550678968429565, "learning_rate": 8.667779632721203e-05, "loss": 1.5333, "step": 1703 }, { "epoch": 0.25054217974637016, "grad_norm": 0.4366767704486847, "learning_rate": 8.66110183639399e-05, "loss": 1.6128, "step": 1704 }, { "epoch": 0.25068921154199597, "grad_norm": 0.644633412361145, "learning_rate": 8.654424040066779e-05, "loss": 1.2076, "step": 1705 }, { "epoch": 0.2508362433376218, "grad_norm": 0.5669049620628357, "learning_rate": 8.647746243739566e-05, "loss": 1.3649, "step": 1706 }, { "epoch": 0.2509832751332476, "grad_norm": 0.5297914147377014, "learning_rate": 8.641068447412355e-05, "loss": 1.2484, "step": 1707 }, { "epoch": 0.2511303069288734, "grad_norm": 0.6258499026298523, "learning_rate": 8.634390651085142e-05, "loss": 1.0289, "step": 1708 }, { "epoch": 0.2512773387244992, "grad_norm": 0.4703880250453949, "learning_rate": 8.62771285475793e-05, "loss": 1.3036, "step": 1709 }, { "epoch": 0.25142437052012495, "grad_norm": 0.6473929286003113, "learning_rate": 8.62103505843072e-05, "loss": 1.3189, "step": 1710 }, { "epoch": 0.25157140231575076, "grad_norm": 0.4140447676181793, "learning_rate": 8.614357262103507e-05, "loss": 1.1293, "step": 1711 }, { "epoch": 0.25171843411137657, "grad_norm": 0.48286300897598267, "learning_rate": 8.607679465776294e-05, "loss": 1.2342, "step": 1712 }, { "epoch": 0.2518654659070024, "grad_norm": 0.6283698081970215, "learning_rate": 8.601001669449083e-05, "loss": 1.1357, "step": 1713 }, { "epoch": 0.2520124977026282, "grad_norm": 0.5845705270767212, "learning_rate": 8.59432387312187e-05, "loss": 0.9596, "step": 1714 }, { "epoch": 0.252159529498254, "grad_norm": 0.4531230330467224, "learning_rate": 8.587646076794659e-05, "loss": 1.945, "step": 1715 }, { "epoch": 0.2523065612938798, "grad_norm": 0.5337319374084473, "learning_rate": 8.580968280467446e-05, "loss": 1.2412, "step": 1716 }, { "epoch": 0.2524535930895056, "grad_norm": 0.6363638043403625, "learning_rate": 8.574290484140233e-05, "loss": 1.1818, "step": 1717 }, { "epoch": 0.2526006248851314, "grad_norm": 0.4370921552181244, "learning_rate": 8.567612687813022e-05, "loss": 1.1222, "step": 1718 }, { "epoch": 0.2527476566807572, "grad_norm": 0.5906668305397034, "learning_rate": 8.56093489148581e-05, "loss": 1.2164, "step": 1719 }, { "epoch": 0.252894688476383, "grad_norm": 0.4730553925037384, "learning_rate": 8.554257095158598e-05, "loss": 0.9878, "step": 1720 }, { "epoch": 0.25304172027200883, "grad_norm": 0.7032566666603088, "learning_rate": 8.547579298831387e-05, "loss": 1.172, "step": 1721 }, { "epoch": 0.25318875206763464, "grad_norm": 0.7880038022994995, "learning_rate": 8.540901502504174e-05, "loss": 1.0961, "step": 1722 }, { "epoch": 0.25333578386326044, "grad_norm": 0.49554726481437683, "learning_rate": 8.534223706176963e-05, "loss": 1.346, "step": 1723 }, { "epoch": 0.25348281565888625, "grad_norm": 0.7238802313804626, "learning_rate": 8.52754590984975e-05, "loss": 1.2065, "step": 1724 }, { "epoch": 0.25362984745451206, "grad_norm": 0.5044317841529846, "learning_rate": 8.520868113522537e-05, "loss": 1.209, "step": 1725 }, { "epoch": 0.25377687925013787, "grad_norm": 0.5443348288536072, "learning_rate": 8.514190317195326e-05, "loss": 1.2146, "step": 1726 }, { "epoch": 0.2539239110457637, "grad_norm": 0.5827182531356812, "learning_rate": 8.507512520868115e-05, "loss": 1.0879, "step": 1727 }, { "epoch": 0.2540709428413894, "grad_norm": 0.6123396754264832, "learning_rate": 8.500834724540902e-05, "loss": 1.0745, "step": 1728 }, { "epoch": 0.25421797463701523, "grad_norm": 0.6806741952896118, "learning_rate": 8.49415692821369e-05, "loss": 0.9064, "step": 1729 }, { "epoch": 0.25436500643264104, "grad_norm": 0.6087940335273743, "learning_rate": 8.487479131886478e-05, "loss": 1.1467, "step": 1730 }, { "epoch": 0.25451203822826685, "grad_norm": 0.5138906836509705, "learning_rate": 8.480801335559267e-05, "loss": 1.4396, "step": 1731 }, { "epoch": 0.25465907002389265, "grad_norm": 0.454508900642395, "learning_rate": 8.474123539232054e-05, "loss": 1.1651, "step": 1732 }, { "epoch": 0.25480610181951846, "grad_norm": 0.4846546947956085, "learning_rate": 8.467445742904841e-05, "loss": 1.2366, "step": 1733 }, { "epoch": 0.25495313361514427, "grad_norm": 0.7293670773506165, "learning_rate": 8.46076794657763e-05, "loss": 1.1622, "step": 1734 }, { "epoch": 0.2551001654107701, "grad_norm": 0.7266565561294556, "learning_rate": 8.454090150250417e-05, "loss": 1.3798, "step": 1735 }, { "epoch": 0.2552471972063959, "grad_norm": 0.5410826802253723, "learning_rate": 8.447412353923206e-05, "loss": 1.2473, "step": 1736 }, { "epoch": 0.2553942290020217, "grad_norm": 0.7238506078720093, "learning_rate": 8.440734557595994e-05, "loss": 1.3722, "step": 1737 }, { "epoch": 0.2555412607976475, "grad_norm": 0.5192293524742126, "learning_rate": 8.434056761268782e-05, "loss": 1.1568, "step": 1738 }, { "epoch": 0.2556882925932733, "grad_norm": 0.5774774551391602, "learning_rate": 8.42737896494157e-05, "loss": 1.3328, "step": 1739 }, { "epoch": 0.2558353243888991, "grad_norm": 0.45590630173683167, "learning_rate": 8.420701168614358e-05, "loss": 1.1128, "step": 1740 }, { "epoch": 0.2559823561845249, "grad_norm": 0.40741512179374695, "learning_rate": 8.414023372287145e-05, "loss": 1.3558, "step": 1741 }, { "epoch": 0.2561293879801507, "grad_norm": 0.5991970300674438, "learning_rate": 8.407345575959934e-05, "loss": 1.1375, "step": 1742 }, { "epoch": 0.25627641977577653, "grad_norm": 0.6191490888595581, "learning_rate": 8.400667779632721e-05, "loss": 1.2003, "step": 1743 }, { "epoch": 0.25642345157140234, "grad_norm": 0.540582001209259, "learning_rate": 8.39398998330551e-05, "loss": 1.125, "step": 1744 }, { "epoch": 0.25657048336702815, "grad_norm": 0.493932843208313, "learning_rate": 8.387312186978298e-05, "loss": 1.276, "step": 1745 }, { "epoch": 0.2567175151626539, "grad_norm": 0.41617459058761597, "learning_rate": 8.380634390651086e-05, "loss": 1.0527, "step": 1746 }, { "epoch": 0.2568645469582797, "grad_norm": 0.5976696014404297, "learning_rate": 8.373956594323874e-05, "loss": 1.1735, "step": 1747 }, { "epoch": 0.2570115787539055, "grad_norm": 0.5746784806251526, "learning_rate": 8.367278797996662e-05, "loss": 1.0275, "step": 1748 }, { "epoch": 0.2571586105495313, "grad_norm": 0.5638889074325562, "learning_rate": 8.360601001669449e-05, "loss": 0.9149, "step": 1749 }, { "epoch": 0.2573056423451571, "grad_norm": 0.4723840355873108, "learning_rate": 8.353923205342238e-05, "loss": 1.6787, "step": 1750 }, { "epoch": 0.25745267414078293, "grad_norm": 0.7996634244918823, "learning_rate": 8.347245409015025e-05, "loss": 1.2951, "step": 1751 }, { "epoch": 0.25759970593640874, "grad_norm": 0.5496968030929565, "learning_rate": 8.340567612687812e-05, "loss": 1.1921, "step": 1752 }, { "epoch": 0.25774673773203455, "grad_norm": 0.6232264637947083, "learning_rate": 8.333889816360601e-05, "loss": 1.3485, "step": 1753 }, { "epoch": 0.25789376952766035, "grad_norm": 0.6679679751396179, "learning_rate": 8.32721202003339e-05, "loss": 1.2345, "step": 1754 }, { "epoch": 0.25804080132328616, "grad_norm": 0.639143168926239, "learning_rate": 8.320534223706178e-05, "loss": 1.1297, "step": 1755 }, { "epoch": 0.25818783311891197, "grad_norm": 0.7377358675003052, "learning_rate": 8.313856427378966e-05, "loss": 1.0815, "step": 1756 }, { "epoch": 0.2583348649145378, "grad_norm": 0.7733796834945679, "learning_rate": 8.307178631051753e-05, "loss": 0.988, "step": 1757 }, { "epoch": 0.2584818967101636, "grad_norm": 0.43508240580558777, "learning_rate": 8.300500834724542e-05, "loss": 1.5615, "step": 1758 }, { "epoch": 0.2586289285057894, "grad_norm": 0.6107215881347656, "learning_rate": 8.293823038397329e-05, "loss": 1.1822, "step": 1759 }, { "epoch": 0.2587759603014152, "grad_norm": 0.6514309048652649, "learning_rate": 8.287145242070116e-05, "loss": 1.2916, "step": 1760 }, { "epoch": 0.258922992097041, "grad_norm": 0.49753686785697937, "learning_rate": 8.280467445742905e-05, "loss": 1.0675, "step": 1761 }, { "epoch": 0.2590700238926668, "grad_norm": 0.45176440477371216, "learning_rate": 8.273789649415694e-05, "loss": 1.3074, "step": 1762 }, { "epoch": 0.2592170556882926, "grad_norm": 0.793578028678894, "learning_rate": 8.267111853088482e-05, "loss": 1.0222, "step": 1763 }, { "epoch": 0.25936408748391837, "grad_norm": 0.5325242280960083, "learning_rate": 8.26043405676127e-05, "loss": 1.0392, "step": 1764 }, { "epoch": 0.2595111192795442, "grad_norm": 0.4184320271015167, "learning_rate": 8.253756260434057e-05, "loss": 1.1857, "step": 1765 }, { "epoch": 0.25965815107517, "grad_norm": 0.5738174319267273, "learning_rate": 8.247078464106846e-05, "loss": 1.2072, "step": 1766 }, { "epoch": 0.2598051828707958, "grad_norm": 0.6402509808540344, "learning_rate": 8.240400667779633e-05, "loss": 1.4482, "step": 1767 }, { "epoch": 0.2599522146664216, "grad_norm": 0.5725671052932739, "learning_rate": 8.23372287145242e-05, "loss": 1.2416, "step": 1768 }, { "epoch": 0.2600992464620474, "grad_norm": 0.5515746474266052, "learning_rate": 8.227045075125209e-05, "loss": 1.2183, "step": 1769 }, { "epoch": 0.2602462782576732, "grad_norm": 0.42489081621170044, "learning_rate": 8.220367278797996e-05, "loss": 1.1921, "step": 1770 }, { "epoch": 0.260393310053299, "grad_norm": 0.47918903827667236, "learning_rate": 8.213689482470785e-05, "loss": 1.4218, "step": 1771 }, { "epoch": 0.26054034184892483, "grad_norm": 0.45460909605026245, "learning_rate": 8.207011686143574e-05, "loss": 1.3586, "step": 1772 }, { "epoch": 0.26068737364455064, "grad_norm": 0.524882972240448, "learning_rate": 8.200333889816361e-05, "loss": 1.3288, "step": 1773 }, { "epoch": 0.26083440544017644, "grad_norm": 0.5847158432006836, "learning_rate": 8.19365609348915e-05, "loss": 0.986, "step": 1774 }, { "epoch": 0.26098143723580225, "grad_norm": 0.3756552040576935, "learning_rate": 8.186978297161937e-05, "loss": 1.7044, "step": 1775 }, { "epoch": 0.26112846903142806, "grad_norm": 0.6210364103317261, "learning_rate": 8.180300500834724e-05, "loss": 1.0175, "step": 1776 }, { "epoch": 0.26127550082705386, "grad_norm": 0.566368818283081, "learning_rate": 8.173622704507513e-05, "loss": 1.418, "step": 1777 }, { "epoch": 0.26142253262267967, "grad_norm": 0.4391399919986725, "learning_rate": 8.1669449081803e-05, "loss": 1.5665, "step": 1778 }, { "epoch": 0.2615695644183055, "grad_norm": 0.42997750639915466, "learning_rate": 8.160267111853089e-05, "loss": 1.3632, "step": 1779 }, { "epoch": 0.2617165962139313, "grad_norm": 0.605319619178772, "learning_rate": 8.153589315525877e-05, "loss": 1.1578, "step": 1780 }, { "epoch": 0.2618636280095571, "grad_norm": 0.35595643520355225, "learning_rate": 8.146911519198665e-05, "loss": 0.9144, "step": 1781 }, { "epoch": 0.26201065980518284, "grad_norm": 0.5646345019340515, "learning_rate": 8.140233722871453e-05, "loss": 1.1415, "step": 1782 }, { "epoch": 0.26215769160080865, "grad_norm": 0.5671446919441223, "learning_rate": 8.133555926544241e-05, "loss": 1.3412, "step": 1783 }, { "epoch": 0.26230472339643446, "grad_norm": 0.5904648303985596, "learning_rate": 8.126878130217028e-05, "loss": 1.0492, "step": 1784 }, { "epoch": 0.26245175519206027, "grad_norm": 0.5604920983314514, "learning_rate": 8.120200333889817e-05, "loss": 1.4834, "step": 1785 }, { "epoch": 0.2625987869876861, "grad_norm": 0.7538804411888123, "learning_rate": 8.113522537562604e-05, "loss": 1.2639, "step": 1786 }, { "epoch": 0.2627458187833119, "grad_norm": 0.5667065978050232, "learning_rate": 8.106844741235393e-05, "loss": 1.2099, "step": 1787 }, { "epoch": 0.2628928505789377, "grad_norm": 0.5092707276344299, "learning_rate": 8.10016694490818e-05, "loss": 1.2297, "step": 1788 }, { "epoch": 0.2630398823745635, "grad_norm": 0.6160154938697815, "learning_rate": 8.093489148580969e-05, "loss": 0.8745, "step": 1789 }, { "epoch": 0.2631869141701893, "grad_norm": 0.6165684461593628, "learning_rate": 8.086811352253757e-05, "loss": 1.3467, "step": 1790 }, { "epoch": 0.2633339459658151, "grad_norm": 0.4950697422027588, "learning_rate": 8.080133555926545e-05, "loss": 1.2175, "step": 1791 }, { "epoch": 0.2634809777614409, "grad_norm": 0.7090346813201904, "learning_rate": 8.073455759599332e-05, "loss": 1.122, "step": 1792 }, { "epoch": 0.2636280095570667, "grad_norm": 0.5060723423957825, "learning_rate": 8.066777963272121e-05, "loss": 1.1683, "step": 1793 }, { "epoch": 0.26377504135269253, "grad_norm": 0.5672757029533386, "learning_rate": 8.060100166944908e-05, "loss": 1.3001, "step": 1794 }, { "epoch": 0.26392207314831834, "grad_norm": 0.5090026259422302, "learning_rate": 8.053422370617697e-05, "loss": 1.2385, "step": 1795 }, { "epoch": 0.26406910494394414, "grad_norm": 0.5424351096153259, "learning_rate": 8.046744574290484e-05, "loss": 1.3927, "step": 1796 }, { "epoch": 0.26421613673956995, "grad_norm": 0.6113479137420654, "learning_rate": 8.040066777963273e-05, "loss": 0.8123, "step": 1797 }, { "epoch": 0.26436316853519576, "grad_norm": 0.5377811193466187, "learning_rate": 8.033388981636061e-05, "loss": 1.2419, "step": 1798 }, { "epoch": 0.26451020033082157, "grad_norm": 0.5767778158187866, "learning_rate": 8.026711185308849e-05, "loss": 0.9898, "step": 1799 }, { "epoch": 0.2646572321264473, "grad_norm": 0.511762797832489, "learning_rate": 8.020033388981636e-05, "loss": 1.2933, "step": 1800 }, { "epoch": 0.2648042639220731, "grad_norm": 0.42173701524734497, "learning_rate": 8.013355592654425e-05, "loss": 1.1249, "step": 1801 }, { "epoch": 0.26495129571769893, "grad_norm": 0.6184702515602112, "learning_rate": 8.006677796327212e-05, "loss": 1.0419, "step": 1802 }, { "epoch": 0.26509832751332474, "grad_norm": 0.5799165368080139, "learning_rate": 8e-05, "loss": 1.1465, "step": 1803 }, { "epoch": 0.26524535930895055, "grad_norm": 0.5278991460800171, "learning_rate": 7.993322203672788e-05, "loss": 1.3459, "step": 1804 }, { "epoch": 0.26539239110457635, "grad_norm": 0.5305517315864563, "learning_rate": 7.986644407345575e-05, "loss": 1.3524, "step": 1805 }, { "epoch": 0.26553942290020216, "grad_norm": 0.5597233176231384, "learning_rate": 7.979966611018364e-05, "loss": 1.0553, "step": 1806 }, { "epoch": 0.26568645469582797, "grad_norm": 0.5003093481063843, "learning_rate": 7.973288814691153e-05, "loss": 1.1868, "step": 1807 }, { "epoch": 0.2658334864914538, "grad_norm": 0.5653015971183777, "learning_rate": 7.96661101836394e-05, "loss": 1.2072, "step": 1808 }, { "epoch": 0.2659805182870796, "grad_norm": 0.49464067816734314, "learning_rate": 7.959933222036729e-05, "loss": 1.2739, "step": 1809 }, { "epoch": 0.2661275500827054, "grad_norm": 0.7131492495536804, "learning_rate": 7.953255425709516e-05, "loss": 1.0212, "step": 1810 }, { "epoch": 0.2662745818783312, "grad_norm": 0.46528223156929016, "learning_rate": 7.946577629382305e-05, "loss": 1.2491, "step": 1811 }, { "epoch": 0.266421613673957, "grad_norm": 0.505845844745636, "learning_rate": 7.939899833055092e-05, "loss": 1.1362, "step": 1812 }, { "epoch": 0.2665686454695828, "grad_norm": 0.6805452704429626, "learning_rate": 7.933222036727879e-05, "loss": 1.3784, "step": 1813 }, { "epoch": 0.2667156772652086, "grad_norm": 0.5717839598655701, "learning_rate": 7.926544240400668e-05, "loss": 1.1917, "step": 1814 }, { "epoch": 0.2668627090608344, "grad_norm": 0.6492916345596313, "learning_rate": 7.919866444073457e-05, "loss": 1.0117, "step": 1815 }, { "epoch": 0.26700974085646023, "grad_norm": 0.48782336711883545, "learning_rate": 7.913188647746244e-05, "loss": 1.7532, "step": 1816 }, { "epoch": 0.26715677265208604, "grad_norm": 0.42241623997688293, "learning_rate": 7.906510851419033e-05, "loss": 1.7461, "step": 1817 }, { "epoch": 0.2673038044477118, "grad_norm": 0.5505175590515137, "learning_rate": 7.89983305509182e-05, "loss": 1.0945, "step": 1818 }, { "epoch": 0.2674508362433376, "grad_norm": 0.4956428110599518, "learning_rate": 7.893155258764609e-05, "loss": 1.2231, "step": 1819 }, { "epoch": 0.2675978680389634, "grad_norm": 0.5840120315551758, "learning_rate": 7.886477462437396e-05, "loss": 1.1052, "step": 1820 }, { "epoch": 0.2677448998345892, "grad_norm": 0.7726488709449768, "learning_rate": 7.879799666110183e-05, "loss": 1.0764, "step": 1821 }, { "epoch": 0.267891931630215, "grad_norm": 0.5668435096740723, "learning_rate": 7.873121869782972e-05, "loss": 0.9085, "step": 1822 }, { "epoch": 0.2680389634258408, "grad_norm": 0.6894591450691223, "learning_rate": 7.86644407345576e-05, "loss": 0.9528, "step": 1823 }, { "epoch": 0.26818599522146663, "grad_norm": 0.541893482208252, "learning_rate": 7.859766277128548e-05, "loss": 1.4193, "step": 1824 }, { "epoch": 0.26833302701709244, "grad_norm": 0.5258090496063232, "learning_rate": 7.853088480801337e-05, "loss": 1.1108, "step": 1825 }, { "epoch": 0.26848005881271825, "grad_norm": 0.4248671531677246, "learning_rate": 7.846410684474124e-05, "loss": 1.2814, "step": 1826 }, { "epoch": 0.26862709060834405, "grad_norm": 0.5643646717071533, "learning_rate": 7.839732888146912e-05, "loss": 0.9957, "step": 1827 }, { "epoch": 0.26877412240396986, "grad_norm": 0.4703482985496521, "learning_rate": 7.8330550918197e-05, "loss": 1.5355, "step": 1828 }, { "epoch": 0.26892115419959567, "grad_norm": 0.585693895816803, "learning_rate": 7.826377295492487e-05, "loss": 1.0412, "step": 1829 }, { "epoch": 0.2690681859952215, "grad_norm": 0.45199042558670044, "learning_rate": 7.819699499165276e-05, "loss": 1.4616, "step": 1830 }, { "epoch": 0.2692152177908473, "grad_norm": 0.41657865047454834, "learning_rate": 7.813021702838063e-05, "loss": 1.5411, "step": 1831 }, { "epoch": 0.2693622495864731, "grad_norm": 0.535971462726593, "learning_rate": 7.806343906510852e-05, "loss": 1.3611, "step": 1832 }, { "epoch": 0.2695092813820989, "grad_norm": 0.43932461738586426, "learning_rate": 7.79966611018364e-05, "loss": 1.118, "step": 1833 }, { "epoch": 0.2696563131777247, "grad_norm": 0.5823005437850952, "learning_rate": 7.792988313856428e-05, "loss": 1.0836, "step": 1834 }, { "epoch": 0.2698033449733505, "grad_norm": 0.6429583430290222, "learning_rate": 7.786310517529216e-05, "loss": 0.8997, "step": 1835 }, { "epoch": 0.26995037676897626, "grad_norm": 0.6450183987617493, "learning_rate": 7.779632721202004e-05, "loss": 0.8588, "step": 1836 }, { "epoch": 0.27009740856460207, "grad_norm": 0.5128918886184692, "learning_rate": 7.772954924874791e-05, "loss": 1.3956, "step": 1837 }, { "epoch": 0.2702444403602279, "grad_norm": 0.475639283657074, "learning_rate": 7.76627712854758e-05, "loss": 1.5938, "step": 1838 }, { "epoch": 0.2703914721558537, "grad_norm": 0.5510367751121521, "learning_rate": 7.759599332220367e-05, "loss": 0.7098, "step": 1839 }, { "epoch": 0.2705385039514795, "grad_norm": 0.44962266087532043, "learning_rate": 7.752921535893156e-05, "loss": 1.1882, "step": 1840 }, { "epoch": 0.2706855357471053, "grad_norm": 0.47262415289878845, "learning_rate": 7.746243739565944e-05, "loss": 1.1075, "step": 1841 }, { "epoch": 0.2708325675427311, "grad_norm": 0.6161636114120483, "learning_rate": 7.739565943238732e-05, "loss": 1.2912, "step": 1842 }, { "epoch": 0.2709795993383569, "grad_norm": 0.5508948564529419, "learning_rate": 7.73288814691152e-05, "loss": 1.0259, "step": 1843 }, { "epoch": 0.2711266311339827, "grad_norm": 0.6116395592689514, "learning_rate": 7.726210350584308e-05, "loss": 1.0135, "step": 1844 }, { "epoch": 0.27127366292960853, "grad_norm": 0.7109143733978271, "learning_rate": 7.719532554257095e-05, "loss": 1.4457, "step": 1845 }, { "epoch": 0.27142069472523433, "grad_norm": 0.6997305750846863, "learning_rate": 7.712854757929884e-05, "loss": 1.453, "step": 1846 }, { "epoch": 0.27156772652086014, "grad_norm": 0.45557042956352234, "learning_rate": 7.706176961602671e-05, "loss": 1.0117, "step": 1847 }, { "epoch": 0.27171475831648595, "grad_norm": 0.4146955907344818, "learning_rate": 7.69949916527546e-05, "loss": 1.0621, "step": 1848 }, { "epoch": 0.27186179011211176, "grad_norm": 0.479611873626709, "learning_rate": 7.692821368948247e-05, "loss": 1.4988, "step": 1849 }, { "epoch": 0.27200882190773756, "grad_norm": 0.4496639668941498, "learning_rate": 7.686143572621036e-05, "loss": 1.1439, "step": 1850 }, { "epoch": 0.27215585370336337, "grad_norm": 0.5634199380874634, "learning_rate": 7.679465776293824e-05, "loss": 1.2681, "step": 1851 }, { "epoch": 0.2723028854989892, "grad_norm": 0.496499627828598, "learning_rate": 7.672787979966612e-05, "loss": 1.2921, "step": 1852 }, { "epoch": 0.272449917294615, "grad_norm": 0.5641376972198486, "learning_rate": 7.666110183639399e-05, "loss": 0.8242, "step": 1853 }, { "epoch": 0.2725969490902408, "grad_norm": 0.6980021595954895, "learning_rate": 7.659432387312188e-05, "loss": 1.494, "step": 1854 }, { "epoch": 0.27274398088586654, "grad_norm": 0.6638109087944031, "learning_rate": 7.652754590984975e-05, "loss": 1.1921, "step": 1855 }, { "epoch": 0.27289101268149235, "grad_norm": 0.4116935729980469, "learning_rate": 7.646076794657764e-05, "loss": 1.5306, "step": 1856 }, { "epoch": 0.27303804447711816, "grad_norm": 0.4734438359737396, "learning_rate": 7.639398998330551e-05, "loss": 1.0976, "step": 1857 }, { "epoch": 0.27318507627274397, "grad_norm": 0.5035160183906555, "learning_rate": 7.63272120200334e-05, "loss": 1.0758, "step": 1858 }, { "epoch": 0.2733321080683698, "grad_norm": 0.5227423906326294, "learning_rate": 7.626043405676128e-05, "loss": 1.0867, "step": 1859 }, { "epoch": 0.2734791398639956, "grad_norm": 0.7425376176834106, "learning_rate": 7.619365609348916e-05, "loss": 1.4417, "step": 1860 }, { "epoch": 0.2736261716596214, "grad_norm": 0.6616160273551941, "learning_rate": 7.612687813021703e-05, "loss": 1.1614, "step": 1861 }, { "epoch": 0.2737732034552472, "grad_norm": 0.4764360785484314, "learning_rate": 7.606010016694492e-05, "loss": 1.5575, "step": 1862 }, { "epoch": 0.273920235250873, "grad_norm": 0.4755435287952423, "learning_rate": 7.599332220367279e-05, "loss": 1.0412, "step": 1863 }, { "epoch": 0.2740672670464988, "grad_norm": 0.385008305311203, "learning_rate": 7.592654424040068e-05, "loss": 1.7892, "step": 1864 }, { "epoch": 0.2742142988421246, "grad_norm": 0.7128744721412659, "learning_rate": 7.585976627712855e-05, "loss": 1.0274, "step": 1865 }, { "epoch": 0.2743613306377504, "grad_norm": 0.4306547939777374, "learning_rate": 7.579298831385642e-05, "loss": 1.5534, "step": 1866 }, { "epoch": 0.27450836243337623, "grad_norm": 0.5952239632606506, "learning_rate": 7.572621035058431e-05, "loss": 0.9246, "step": 1867 }, { "epoch": 0.27465539422900204, "grad_norm": 0.3634586036205292, "learning_rate": 7.56594323873122e-05, "loss": 1.4349, "step": 1868 }, { "epoch": 0.27480242602462784, "grad_norm": 0.4984404444694519, "learning_rate": 7.559265442404007e-05, "loss": 1.3715, "step": 1869 }, { "epoch": 0.27494945782025365, "grad_norm": 0.5565382242202759, "learning_rate": 7.552587646076796e-05, "loss": 1.2784, "step": 1870 }, { "epoch": 0.27509648961587946, "grad_norm": 0.6614664196968079, "learning_rate": 7.545909849749583e-05, "loss": 1.1293, "step": 1871 }, { "epoch": 0.27524352141150527, "grad_norm": 0.5753129124641418, "learning_rate": 7.539232053422371e-05, "loss": 1.2039, "step": 1872 }, { "epoch": 0.275390553207131, "grad_norm": 0.5802760720252991, "learning_rate": 7.532554257095159e-05, "loss": 1.5134, "step": 1873 }, { "epoch": 0.2755375850027568, "grad_norm": 0.568296492099762, "learning_rate": 7.525876460767946e-05, "loss": 1.4062, "step": 1874 }, { "epoch": 0.27568461679838263, "grad_norm": 0.5929498076438904, "learning_rate": 7.519198664440735e-05, "loss": 1.0533, "step": 1875 }, { "epoch": 0.27583164859400844, "grad_norm": 0.4687759578227997, "learning_rate": 7.512520868113523e-05, "loss": 1.0985, "step": 1876 }, { "epoch": 0.27597868038963425, "grad_norm": 0.9196427464485168, "learning_rate": 7.505843071786311e-05, "loss": 1.1953, "step": 1877 }, { "epoch": 0.27612571218526005, "grad_norm": 0.6411634683609009, "learning_rate": 7.4991652754591e-05, "loss": 1.5191, "step": 1878 }, { "epoch": 0.27627274398088586, "grad_norm": 0.6908376216888428, "learning_rate": 7.492487479131887e-05, "loss": 1.0889, "step": 1879 }, { "epoch": 0.27641977577651167, "grad_norm": 0.6760402321815491, "learning_rate": 7.485809682804675e-05, "loss": 0.9896, "step": 1880 }, { "epoch": 0.2765668075721375, "grad_norm": 0.5265118479728699, "learning_rate": 7.479131886477463e-05, "loss": 0.9531, "step": 1881 }, { "epoch": 0.2767138393677633, "grad_norm": 0.5493643283843994, "learning_rate": 7.47245409015025e-05, "loss": 1.1473, "step": 1882 }, { "epoch": 0.2768608711633891, "grad_norm": 0.5827049612998962, "learning_rate": 7.465776293823039e-05, "loss": 1.1449, "step": 1883 }, { "epoch": 0.2770079029590149, "grad_norm": 0.5407295823097229, "learning_rate": 7.459098497495826e-05, "loss": 1.5032, "step": 1884 }, { "epoch": 0.2771549347546407, "grad_norm": 0.7719463109970093, "learning_rate": 7.452420701168615e-05, "loss": 1.4961, "step": 1885 }, { "epoch": 0.2773019665502665, "grad_norm": 0.48682111501693726, "learning_rate": 7.445742904841403e-05, "loss": 0.7605, "step": 1886 }, { "epoch": 0.2774489983458923, "grad_norm": 0.6166440844535828, "learning_rate": 7.439065108514191e-05, "loss": 1.225, "step": 1887 }, { "epoch": 0.2775960301415181, "grad_norm": 0.5994965434074402, "learning_rate": 7.43238731218698e-05, "loss": 1.1843, "step": 1888 }, { "epoch": 0.27774306193714393, "grad_norm": 0.5836843848228455, "learning_rate": 7.425709515859767e-05, "loss": 1.2723, "step": 1889 }, { "epoch": 0.27789009373276974, "grad_norm": 0.7401608228683472, "learning_rate": 7.419031719532554e-05, "loss": 0.8633, "step": 1890 }, { "epoch": 0.2780371255283955, "grad_norm": 0.5599168539047241, "learning_rate": 7.412353923205343e-05, "loss": 1.1618, "step": 1891 }, { "epoch": 0.2781841573240213, "grad_norm": 0.6821097731590271, "learning_rate": 7.40567612687813e-05, "loss": 1.007, "step": 1892 }, { "epoch": 0.2783311891196471, "grad_norm": 0.46576976776123047, "learning_rate": 7.398998330550919e-05, "loss": 1.7113, "step": 1893 }, { "epoch": 0.2784782209152729, "grad_norm": 0.42279449105262756, "learning_rate": 7.392320534223707e-05, "loss": 1.6158, "step": 1894 }, { "epoch": 0.2786252527108987, "grad_norm": 0.4428022503852844, "learning_rate": 7.385642737896495e-05, "loss": 1.3739, "step": 1895 }, { "epoch": 0.2787722845065245, "grad_norm": 0.6098983287811279, "learning_rate": 7.378964941569283e-05, "loss": 1.1924, "step": 1896 }, { "epoch": 0.27891931630215033, "grad_norm": 0.5587924718856812, "learning_rate": 7.37228714524207e-05, "loss": 1.2879, "step": 1897 }, { "epoch": 0.27906634809777614, "grad_norm": 0.41118982434272766, "learning_rate": 7.365609348914858e-05, "loss": 1.1261, "step": 1898 }, { "epoch": 0.27921337989340195, "grad_norm": 0.4193499684333801, "learning_rate": 7.358931552587647e-05, "loss": 0.6757, "step": 1899 }, { "epoch": 0.27936041168902775, "grad_norm": 0.5715445280075073, "learning_rate": 7.352253756260434e-05, "loss": 1.0439, "step": 1900 }, { "epoch": 0.27950744348465356, "grad_norm": 0.5902449488639832, "learning_rate": 7.345575959933221e-05, "loss": 0.9435, "step": 1901 }, { "epoch": 0.27965447528027937, "grad_norm": 0.6230447888374329, "learning_rate": 7.33889816360601e-05, "loss": 1.0819, "step": 1902 }, { "epoch": 0.2798015070759052, "grad_norm": 0.5868757963180542, "learning_rate": 7.332220367278799e-05, "loss": 1.1459, "step": 1903 }, { "epoch": 0.279948538871531, "grad_norm": 0.41278520226478577, "learning_rate": 7.325542570951587e-05, "loss": 0.9912, "step": 1904 }, { "epoch": 0.2800955706671568, "grad_norm": 0.596552312374115, "learning_rate": 7.318864774624375e-05, "loss": 1.0013, "step": 1905 }, { "epoch": 0.2802426024627826, "grad_norm": 0.5396944284439087, "learning_rate": 7.312186978297162e-05, "loss": 1.5481, "step": 1906 }, { "epoch": 0.2803896342584084, "grad_norm": 0.46005991101264954, "learning_rate": 7.30550918196995e-05, "loss": 1.1353, "step": 1907 }, { "epoch": 0.2805366660540342, "grad_norm": 0.39450111985206604, "learning_rate": 7.298831385642738e-05, "loss": 1.4403, "step": 1908 }, { "epoch": 0.28068369784965996, "grad_norm": 0.5311219096183777, "learning_rate": 7.292153589315525e-05, "loss": 1.1241, "step": 1909 }, { "epoch": 0.28083072964528577, "grad_norm": 0.5125832557678223, "learning_rate": 7.285475792988314e-05, "loss": 1.0766, "step": 1910 }, { "epoch": 0.2809777614409116, "grad_norm": 0.7012507915496826, "learning_rate": 7.278797996661103e-05, "loss": 0.7766, "step": 1911 }, { "epoch": 0.2811247932365374, "grad_norm": 0.4763702154159546, "learning_rate": 7.272120200333891e-05, "loss": 1.5555, "step": 1912 }, { "epoch": 0.2812718250321632, "grad_norm": 0.5403204560279846, "learning_rate": 7.265442404006679e-05, "loss": 1.0164, "step": 1913 }, { "epoch": 0.281418856827789, "grad_norm": 0.5643818974494934, "learning_rate": 7.258764607679466e-05, "loss": 1.1818, "step": 1914 }, { "epoch": 0.2815658886234148, "grad_norm": 0.5021770596504211, "learning_rate": 7.252086811352255e-05, "loss": 0.9219, "step": 1915 }, { "epoch": 0.2817129204190406, "grad_norm": 0.46953412890434265, "learning_rate": 7.245409015025042e-05, "loss": 1.5228, "step": 1916 }, { "epoch": 0.2818599522146664, "grad_norm": 0.5795334577560425, "learning_rate": 7.238731218697829e-05, "loss": 0.883, "step": 1917 }, { "epoch": 0.2820069840102922, "grad_norm": 0.7381460666656494, "learning_rate": 7.232053422370618e-05, "loss": 1.2868, "step": 1918 }, { "epoch": 0.28215401580591803, "grad_norm": 0.6550155878067017, "learning_rate": 7.225375626043405e-05, "loss": 1.1615, "step": 1919 }, { "epoch": 0.28230104760154384, "grad_norm": 0.5407784581184387, "learning_rate": 7.218697829716194e-05, "loss": 1.1053, "step": 1920 }, { "epoch": 0.28244807939716965, "grad_norm": 0.599213182926178, "learning_rate": 7.212020033388982e-05, "loss": 1.167, "step": 1921 }, { "epoch": 0.28259511119279546, "grad_norm": 0.40606042742729187, "learning_rate": 7.20534223706177e-05, "loss": 1.6125, "step": 1922 }, { "epoch": 0.28274214298842126, "grad_norm": 0.5165541768074036, "learning_rate": 7.198664440734558e-05, "loss": 0.9891, "step": 1923 }, { "epoch": 0.28288917478404707, "grad_norm": 0.7490444183349609, "learning_rate": 7.191986644407346e-05, "loss": 0.9136, "step": 1924 }, { "epoch": 0.2830362065796729, "grad_norm": 0.5492925643920898, "learning_rate": 7.185308848080133e-05, "loss": 1.3952, "step": 1925 }, { "epoch": 0.2831832383752987, "grad_norm": 0.7423551082611084, "learning_rate": 7.178631051752922e-05, "loss": 1.1117, "step": 1926 }, { "epoch": 0.28333027017092444, "grad_norm": 0.6472764015197754, "learning_rate": 7.171953255425709e-05, "loss": 0.8736, "step": 1927 }, { "epoch": 0.28347730196655024, "grad_norm": 0.5720741152763367, "learning_rate": 7.165275459098498e-05, "loss": 1.0602, "step": 1928 }, { "epoch": 0.28362433376217605, "grad_norm": 0.5701491832733154, "learning_rate": 7.158597662771286e-05, "loss": 1.1701, "step": 1929 }, { "epoch": 0.28377136555780186, "grad_norm": 0.5187951922416687, "learning_rate": 7.151919866444074e-05, "loss": 1.4645, "step": 1930 }, { "epoch": 0.28391839735342767, "grad_norm": 0.5279660224914551, "learning_rate": 7.145242070116862e-05, "loss": 1.2076, "step": 1931 }, { "epoch": 0.28406542914905347, "grad_norm": 0.5809940099716187, "learning_rate": 7.13856427378965e-05, "loss": 1.2868, "step": 1932 }, { "epoch": 0.2842124609446793, "grad_norm": 0.6069310307502747, "learning_rate": 7.131886477462437e-05, "loss": 1.2219, "step": 1933 }, { "epoch": 0.2843594927403051, "grad_norm": 0.445905476808548, "learning_rate": 7.125208681135226e-05, "loss": 1.0007, "step": 1934 }, { "epoch": 0.2845065245359309, "grad_norm": 0.46322816610336304, "learning_rate": 7.118530884808013e-05, "loss": 1.3487, "step": 1935 }, { "epoch": 0.2846535563315567, "grad_norm": 0.5286208987236023, "learning_rate": 7.111853088480802e-05, "loss": 0.9648, "step": 1936 }, { "epoch": 0.2848005881271825, "grad_norm": 0.6910490393638611, "learning_rate": 7.105175292153589e-05, "loss": 1.1877, "step": 1937 }, { "epoch": 0.2849476199228083, "grad_norm": 0.42590999603271484, "learning_rate": 7.098497495826378e-05, "loss": 1.7435, "step": 1938 }, { "epoch": 0.2850946517184341, "grad_norm": 0.6263895630836487, "learning_rate": 7.091819699499166e-05, "loss": 1.2682, "step": 1939 }, { "epoch": 0.28524168351405993, "grad_norm": 0.671818196773529, "learning_rate": 7.085141903171954e-05, "loss": 1.2499, "step": 1940 }, { "epoch": 0.28538871530968574, "grad_norm": 0.5737487077713013, "learning_rate": 7.078464106844741e-05, "loss": 0.9762, "step": 1941 }, { "epoch": 0.28553574710531154, "grad_norm": 0.4834975600242615, "learning_rate": 7.07178631051753e-05, "loss": 0.9988, "step": 1942 }, { "epoch": 0.28568277890093735, "grad_norm": 0.6684873700141907, "learning_rate": 7.065108514190317e-05, "loss": 1.0235, "step": 1943 }, { "epoch": 0.28582981069656316, "grad_norm": 0.44444718956947327, "learning_rate": 7.058430717863106e-05, "loss": 1.0099, "step": 1944 }, { "epoch": 0.2859768424921889, "grad_norm": 0.6976968050003052, "learning_rate": 7.051752921535893e-05, "loss": 1.3772, "step": 1945 }, { "epoch": 0.2861238742878147, "grad_norm": 0.6606600880622864, "learning_rate": 7.045075125208682e-05, "loss": 1.0105, "step": 1946 }, { "epoch": 0.2862709060834405, "grad_norm": 0.49308377504348755, "learning_rate": 7.03839732888147e-05, "loss": 1.8105, "step": 1947 }, { "epoch": 0.28641793787906633, "grad_norm": 0.474101185798645, "learning_rate": 7.031719532554258e-05, "loss": 1.708, "step": 1948 }, { "epoch": 0.28656496967469214, "grad_norm": 0.6242483854293823, "learning_rate": 7.025041736227045e-05, "loss": 1.0688, "step": 1949 }, { "epoch": 0.28671200147031795, "grad_norm": 0.6197249293327332, "learning_rate": 7.018363939899834e-05, "loss": 0.9479, "step": 1950 }, { "epoch": 0.28685903326594375, "grad_norm": 0.608387291431427, "learning_rate": 7.011686143572621e-05, "loss": 0.8758, "step": 1951 }, { "epoch": 0.28700606506156956, "grad_norm": 0.525964081287384, "learning_rate": 7.00500834724541e-05, "loss": 1.5912, "step": 1952 }, { "epoch": 0.28715309685719537, "grad_norm": 0.41274020075798035, "learning_rate": 6.998330550918197e-05, "loss": 1.6357, "step": 1953 }, { "epoch": 0.2873001286528212, "grad_norm": 0.47561508417129517, "learning_rate": 6.991652754590986e-05, "loss": 1.0596, "step": 1954 }, { "epoch": 0.287447160448447, "grad_norm": 0.6381984949111938, "learning_rate": 6.984974958263774e-05, "loss": 0.9938, "step": 1955 }, { "epoch": 0.2875941922440728, "grad_norm": 0.6282967329025269, "learning_rate": 6.978297161936562e-05, "loss": 1.1007, "step": 1956 }, { "epoch": 0.2877412240396986, "grad_norm": 0.47016745805740356, "learning_rate": 6.971619365609349e-05, "loss": 0.9521, "step": 1957 }, { "epoch": 0.2878882558353244, "grad_norm": 0.8224629163742065, "learning_rate": 6.964941569282138e-05, "loss": 1.1185, "step": 1958 }, { "epoch": 0.2880352876309502, "grad_norm": 0.6366493701934814, "learning_rate": 6.958263772954925e-05, "loss": 0.9732, "step": 1959 }, { "epoch": 0.288182319426576, "grad_norm": 0.48568084836006165, "learning_rate": 6.951585976627714e-05, "loss": 1.336, "step": 1960 }, { "epoch": 0.2883293512222018, "grad_norm": 0.3265235424041748, "learning_rate": 6.944908180300501e-05, "loss": 1.5803, "step": 1961 }, { "epoch": 0.28847638301782763, "grad_norm": 0.4930456280708313, "learning_rate": 6.938230383973288e-05, "loss": 1.4937, "step": 1962 }, { "epoch": 0.2886234148134534, "grad_norm": 0.8526232838630676, "learning_rate": 6.931552587646077e-05, "loss": 1.1944, "step": 1963 }, { "epoch": 0.2887704466090792, "grad_norm": 0.5472308397293091, "learning_rate": 6.924874791318865e-05, "loss": 1.139, "step": 1964 }, { "epoch": 0.288917478404705, "grad_norm": 0.7131630182266235, "learning_rate": 6.918196994991654e-05, "loss": 0.9071, "step": 1965 }, { "epoch": 0.2890645102003308, "grad_norm": 0.5545789003372192, "learning_rate": 6.911519198664441e-05, "loss": 1.3795, "step": 1966 }, { "epoch": 0.2892115419959566, "grad_norm": 0.5522712469100952, "learning_rate": 6.904841402337229e-05, "loss": 1.2204, "step": 1967 }, { "epoch": 0.2893585737915824, "grad_norm": 0.5298013091087341, "learning_rate": 6.898163606010017e-05, "loss": 1.1277, "step": 1968 }, { "epoch": 0.2895056055872082, "grad_norm": 0.4840368628501892, "learning_rate": 6.891485809682805e-05, "loss": 1.4353, "step": 1969 }, { "epoch": 0.28965263738283403, "grad_norm": 0.6055407524108887, "learning_rate": 6.884808013355592e-05, "loss": 0.8174, "step": 1970 }, { "epoch": 0.28979966917845984, "grad_norm": 0.5609264373779297, "learning_rate": 6.878130217028381e-05, "loss": 1.5731, "step": 1971 }, { "epoch": 0.28994670097408565, "grad_norm": 0.5264747738838196, "learning_rate": 6.87145242070117e-05, "loss": 0.9938, "step": 1972 }, { "epoch": 0.29009373276971145, "grad_norm": 0.6650087237358093, "learning_rate": 6.864774624373958e-05, "loss": 1.086, "step": 1973 }, { "epoch": 0.29024076456533726, "grad_norm": 0.5936080813407898, "learning_rate": 6.858096828046745e-05, "loss": 1.3311, "step": 1974 }, { "epoch": 0.29038779636096307, "grad_norm": 0.47604304552078247, "learning_rate": 6.851419031719533e-05, "loss": 1.1654, "step": 1975 }, { "epoch": 0.2905348281565889, "grad_norm": 0.5561505556106567, "learning_rate": 6.844741235392321e-05, "loss": 1.2989, "step": 1976 }, { "epoch": 0.2906818599522147, "grad_norm": 0.5516951084136963, "learning_rate": 6.838063439065109e-05, "loss": 1.0855, "step": 1977 }, { "epoch": 0.2908288917478405, "grad_norm": 0.49726325273513794, "learning_rate": 6.831385642737896e-05, "loss": 1.2547, "step": 1978 }, { "epoch": 0.2909759235434663, "grad_norm": 0.6941009163856506, "learning_rate": 6.824707846410685e-05, "loss": 1.2564, "step": 1979 }, { "epoch": 0.2911229553390921, "grad_norm": 0.9704086780548096, "learning_rate": 6.818030050083472e-05, "loss": 1.329, "step": 1980 }, { "epoch": 0.29126998713471786, "grad_norm": 0.5602558255195618, "learning_rate": 6.811352253756261e-05, "loss": 1.1648, "step": 1981 }, { "epoch": 0.29141701893034366, "grad_norm": 0.41304153203964233, "learning_rate": 6.80467445742905e-05, "loss": 1.2734, "step": 1982 }, { "epoch": 0.29156405072596947, "grad_norm": 0.6137445569038391, "learning_rate": 6.797996661101837e-05, "loss": 1.3662, "step": 1983 }, { "epoch": 0.2917110825215953, "grad_norm": 0.5062145590782166, "learning_rate": 6.791318864774625e-05, "loss": 1.4159, "step": 1984 }, { "epoch": 0.2918581143172211, "grad_norm": 0.5087314248085022, "learning_rate": 6.784641068447413e-05, "loss": 1.439, "step": 1985 }, { "epoch": 0.2920051461128469, "grad_norm": 0.5006468892097473, "learning_rate": 6.7779632721202e-05, "loss": 1.0333, "step": 1986 }, { "epoch": 0.2921521779084727, "grad_norm": 0.5835219621658325, "learning_rate": 6.771285475792989e-05, "loss": 0.8918, "step": 1987 }, { "epoch": 0.2922992097040985, "grad_norm": 0.7039252519607544, "learning_rate": 6.764607679465776e-05, "loss": 0.9891, "step": 1988 }, { "epoch": 0.2924462414997243, "grad_norm": 0.4339163899421692, "learning_rate": 6.757929883138565e-05, "loss": 1.6861, "step": 1989 }, { "epoch": 0.2925932732953501, "grad_norm": 0.5073337554931641, "learning_rate": 6.751252086811353e-05, "loss": 1.2259, "step": 1990 }, { "epoch": 0.2927403050909759, "grad_norm": 0.4264913499355316, "learning_rate": 6.74457429048414e-05, "loss": 1.2021, "step": 1991 }, { "epoch": 0.29288733688660173, "grad_norm": 0.5056177377700806, "learning_rate": 6.737896494156929e-05, "loss": 1.3878, "step": 1992 }, { "epoch": 0.29303436868222754, "grad_norm": 0.5917510390281677, "learning_rate": 6.731218697829717e-05, "loss": 1.3334, "step": 1993 }, { "epoch": 0.29318140047785335, "grad_norm": 0.5753657221794128, "learning_rate": 6.724540901502504e-05, "loss": 1.347, "step": 1994 }, { "epoch": 0.29332843227347916, "grad_norm": 0.5680025219917297, "learning_rate": 6.717863105175293e-05, "loss": 1.399, "step": 1995 }, { "epoch": 0.29347546406910496, "grad_norm": 0.6970915794372559, "learning_rate": 6.71118530884808e-05, "loss": 1.1509, "step": 1996 }, { "epoch": 0.29362249586473077, "grad_norm": 0.6334583163261414, "learning_rate": 6.704507512520869e-05, "loss": 1.1426, "step": 1997 }, { "epoch": 0.2937695276603566, "grad_norm": 0.8059874176979065, "learning_rate": 6.697829716193656e-05, "loss": 1.3639, "step": 1998 }, { "epoch": 0.29391655945598233, "grad_norm": 0.6556105017662048, "learning_rate": 6.691151919866445e-05, "loss": 1.591, "step": 1999 }, { "epoch": 0.29406359125160814, "grad_norm": 0.6405893564224243, "learning_rate": 6.684474123539233e-05, "loss": 1.0621, "step": 2000 }, { "epoch": 0.29421062304723394, "grad_norm": 0.6156466603279114, "learning_rate": 6.67779632721202e-05, "loss": 1.3127, "step": 2001 }, { "epoch": 0.29435765484285975, "grad_norm": 0.679510772228241, "learning_rate": 6.671118530884808e-05, "loss": 0.8802, "step": 2002 }, { "epoch": 0.29450468663848556, "grad_norm": 0.4807721674442291, "learning_rate": 6.664440734557597e-05, "loss": 1.1643, "step": 2003 }, { "epoch": 0.29465171843411136, "grad_norm": 0.5710380673408508, "learning_rate": 6.657762938230384e-05, "loss": 1.1552, "step": 2004 }, { "epoch": 0.29479875022973717, "grad_norm": 0.6404958367347717, "learning_rate": 6.651085141903173e-05, "loss": 1.0163, "step": 2005 }, { "epoch": 0.294945782025363, "grad_norm": 0.5454883575439453, "learning_rate": 6.64440734557596e-05, "loss": 1.055, "step": 2006 }, { "epoch": 0.2950928138209888, "grad_norm": 0.745124340057373, "learning_rate": 6.637729549248749e-05, "loss": 1.1295, "step": 2007 }, { "epoch": 0.2952398456166146, "grad_norm": 0.6523913741111755, "learning_rate": 6.631051752921537e-05, "loss": 1.2585, "step": 2008 }, { "epoch": 0.2953868774122404, "grad_norm": 0.5812882781028748, "learning_rate": 6.624373956594325e-05, "loss": 1.2048, "step": 2009 }, { "epoch": 0.2955339092078662, "grad_norm": 0.5113157033920288, "learning_rate": 6.617696160267112e-05, "loss": 1.518, "step": 2010 }, { "epoch": 0.295680941003492, "grad_norm": 0.3844226598739624, "learning_rate": 6.6110183639399e-05, "loss": 1.1334, "step": 2011 }, { "epoch": 0.2958279727991178, "grad_norm": 0.5992608070373535, "learning_rate": 6.604340567612688e-05, "loss": 1.7081, "step": 2012 }, { "epoch": 0.29597500459474363, "grad_norm": 0.6703015565872192, "learning_rate": 6.597662771285476e-05, "loss": 1.1633, "step": 2013 }, { "epoch": 0.29612203639036944, "grad_norm": 0.5947973728179932, "learning_rate": 6.590984974958264e-05, "loss": 1.0311, "step": 2014 }, { "epoch": 0.29626906818599524, "grad_norm": 0.5306417346000671, "learning_rate": 6.584307178631051e-05, "loss": 1.6208, "step": 2015 }, { "epoch": 0.29641609998162105, "grad_norm": 0.43819659948349, "learning_rate": 6.57762938230384e-05, "loss": 1.6636, "step": 2016 }, { "epoch": 0.2965631317772468, "grad_norm": 0.7657707929611206, "learning_rate": 6.570951585976628e-05, "loss": 1.0667, "step": 2017 }, { "epoch": 0.2967101635728726, "grad_norm": 0.5431534647941589, "learning_rate": 6.564273789649416e-05, "loss": 1.1846, "step": 2018 }, { "epoch": 0.2968571953684984, "grad_norm": 0.591914176940918, "learning_rate": 6.557595993322204e-05, "loss": 0.9365, "step": 2019 }, { "epoch": 0.2970042271641242, "grad_norm": 0.5348078608512878, "learning_rate": 6.550918196994992e-05, "loss": 1.1245, "step": 2020 }, { "epoch": 0.29715125895975003, "grad_norm": 0.6871307492256165, "learning_rate": 6.54424040066778e-05, "loss": 1.2163, "step": 2021 }, { "epoch": 0.29729829075537584, "grad_norm": 0.5329853296279907, "learning_rate": 6.537562604340568e-05, "loss": 1.3231, "step": 2022 }, { "epoch": 0.29744532255100165, "grad_norm": 0.45578184723854065, "learning_rate": 6.530884808013355e-05, "loss": 1.246, "step": 2023 }, { "epoch": 0.29759235434662745, "grad_norm": 0.5610411167144775, "learning_rate": 6.524207011686144e-05, "loss": 1.0161, "step": 2024 }, { "epoch": 0.29773938614225326, "grad_norm": 0.55032879114151, "learning_rate": 6.517529215358932e-05, "loss": 1.2007, "step": 2025 }, { "epoch": 0.29788641793787907, "grad_norm": 0.5526856780052185, "learning_rate": 6.51085141903172e-05, "loss": 1.3606, "step": 2026 }, { "epoch": 0.2980334497335049, "grad_norm": 0.5738084316253662, "learning_rate": 6.504173622704508e-05, "loss": 0.9945, "step": 2027 }, { "epoch": 0.2981804815291307, "grad_norm": 0.39906975626945496, "learning_rate": 6.497495826377296e-05, "loss": 1.6652, "step": 2028 }, { "epoch": 0.2983275133247565, "grad_norm": 0.5410943627357483, "learning_rate": 6.490818030050084e-05, "loss": 1.1994, "step": 2029 }, { "epoch": 0.2984745451203823, "grad_norm": 0.5782895088195801, "learning_rate": 6.484140233722872e-05, "loss": 1.2583, "step": 2030 }, { "epoch": 0.2986215769160081, "grad_norm": 0.7866604924201965, "learning_rate": 6.477462437395659e-05, "loss": 1.1222, "step": 2031 }, { "epoch": 0.2987686087116339, "grad_norm": 0.6570521593093872, "learning_rate": 6.470784641068448e-05, "loss": 1.1293, "step": 2032 }, { "epoch": 0.2989156405072597, "grad_norm": 0.4779338538646698, "learning_rate": 6.464106844741235e-05, "loss": 1.0697, "step": 2033 }, { "epoch": 0.2990626723028855, "grad_norm": 0.714025616645813, "learning_rate": 6.457429048414024e-05, "loss": 1.2947, "step": 2034 }, { "epoch": 0.2992097040985113, "grad_norm": 0.6599497199058533, "learning_rate": 6.450751252086812e-05, "loss": 1.3078, "step": 2035 }, { "epoch": 0.2993567358941371, "grad_norm": 0.6218534708023071, "learning_rate": 6.4440734557596e-05, "loss": 1.049, "step": 2036 }, { "epoch": 0.2995037676897629, "grad_norm": 0.5149571895599365, "learning_rate": 6.437395659432388e-05, "loss": 0.9912, "step": 2037 }, { "epoch": 0.2996507994853887, "grad_norm": 0.6225073933601379, "learning_rate": 6.430717863105176e-05, "loss": 1.1043, "step": 2038 }, { "epoch": 0.2997978312810145, "grad_norm": 0.5702974796295166, "learning_rate": 6.424040066777963e-05, "loss": 1.3507, "step": 2039 }, { "epoch": 0.2999448630766403, "grad_norm": 0.5927395224571228, "learning_rate": 6.417362270450752e-05, "loss": 1.0864, "step": 2040 }, { "epoch": 0.3000918948722661, "grad_norm": 0.5591736435890198, "learning_rate": 6.410684474123539e-05, "loss": 1.3127, "step": 2041 }, { "epoch": 0.3002389266678919, "grad_norm": 0.5282946825027466, "learning_rate": 6.404006677796328e-05, "loss": 1.3726, "step": 2042 }, { "epoch": 0.30038595846351773, "grad_norm": 0.6433884501457214, "learning_rate": 6.397328881469116e-05, "loss": 1.0879, "step": 2043 }, { "epoch": 0.30053299025914354, "grad_norm": 0.5745020508766174, "learning_rate": 6.390651085141904e-05, "loss": 1.4242, "step": 2044 }, { "epoch": 0.30068002205476935, "grad_norm": 0.5236560702323914, "learning_rate": 6.383973288814692e-05, "loss": 1.2185, "step": 2045 }, { "epoch": 0.30082705385039515, "grad_norm": 0.5411531329154968, "learning_rate": 6.37729549248748e-05, "loss": 1.0304, "step": 2046 }, { "epoch": 0.30097408564602096, "grad_norm": 0.5680497884750366, "learning_rate": 6.370617696160267e-05, "loss": 0.9687, "step": 2047 }, { "epoch": 0.30112111744164677, "grad_norm": 0.6116782426834106, "learning_rate": 6.363939899833056e-05, "loss": 1.2464, "step": 2048 }, { "epoch": 0.3012681492372726, "grad_norm": 0.8245232701301575, "learning_rate": 6.357262103505843e-05, "loss": 1.021, "step": 2049 }, { "epoch": 0.3014151810328984, "grad_norm": 0.5428927540779114, "learning_rate": 6.35058430717863e-05, "loss": 1.1814, "step": 2050 }, { "epoch": 0.3015622128285242, "grad_norm": 0.5788788199424744, "learning_rate": 6.343906510851419e-05, "loss": 1.0649, "step": 2051 }, { "epoch": 0.30170924462415, "grad_norm": 0.4899301826953888, "learning_rate": 6.337228714524208e-05, "loss": 1.1186, "step": 2052 }, { "epoch": 0.3018562764197758, "grad_norm": 0.7030068039894104, "learning_rate": 6.330550918196996e-05, "loss": 1.2243, "step": 2053 }, { "epoch": 0.30200330821540156, "grad_norm": 0.44717729091644287, "learning_rate": 6.323873121869784e-05, "loss": 1.3291, "step": 2054 }, { "epoch": 0.30215034001102736, "grad_norm": 0.5356654524803162, "learning_rate": 6.317195325542571e-05, "loss": 1.2629, "step": 2055 }, { "epoch": 0.30229737180665317, "grad_norm": 0.5388799905776978, "learning_rate": 6.31051752921536e-05, "loss": 1.1402, "step": 2056 }, { "epoch": 0.302444403602279, "grad_norm": 0.5928915143013, "learning_rate": 6.303839732888147e-05, "loss": 1.2784, "step": 2057 }, { "epoch": 0.3025914353979048, "grad_norm": 0.48179319500923157, "learning_rate": 6.297161936560934e-05, "loss": 1.4108, "step": 2058 }, { "epoch": 0.3027384671935306, "grad_norm": 0.6406283378601074, "learning_rate": 6.290484140233723e-05, "loss": 1.0483, "step": 2059 }, { "epoch": 0.3028854989891564, "grad_norm": 0.7895448207855225, "learning_rate": 6.283806343906511e-05, "loss": 0.9722, "step": 2060 }, { "epoch": 0.3030325307847822, "grad_norm": 0.6515761613845825, "learning_rate": 6.2771285475793e-05, "loss": 0.9609, "step": 2061 }, { "epoch": 0.303179562580408, "grad_norm": 0.7410962581634521, "learning_rate": 6.270450751252087e-05, "loss": 1.0645, "step": 2062 }, { "epoch": 0.3033265943760338, "grad_norm": 0.48362237215042114, "learning_rate": 6.263772954924875e-05, "loss": 1.102, "step": 2063 }, { "epoch": 0.3034736261716596, "grad_norm": 0.47745728492736816, "learning_rate": 6.257095158597663e-05, "loss": 1.304, "step": 2064 }, { "epoch": 0.30362065796728543, "grad_norm": 0.5258874893188477, "learning_rate": 6.250417362270451e-05, "loss": 1.0681, "step": 2065 }, { "epoch": 0.30376768976291124, "grad_norm": 0.6379144787788391, "learning_rate": 6.243739565943238e-05, "loss": 1.053, "step": 2066 }, { "epoch": 0.30391472155853705, "grad_norm": 0.6102416515350342, "learning_rate": 6.237061769616027e-05, "loss": 1.1269, "step": 2067 }, { "epoch": 0.30406175335416286, "grad_norm": 0.6419486999511719, "learning_rate": 6.230383973288815e-05, "loss": 1.2855, "step": 2068 }, { "epoch": 0.30420878514978866, "grad_norm": 0.5650056004524231, "learning_rate": 6.223706176961604e-05, "loss": 1.1542, "step": 2069 }, { "epoch": 0.30435581694541447, "grad_norm": 0.44635745882987976, "learning_rate": 6.217028380634391e-05, "loss": 1.5572, "step": 2070 }, { "epoch": 0.3045028487410403, "grad_norm": 0.4978647530078888, "learning_rate": 6.210350584307179e-05, "loss": 0.953, "step": 2071 }, { "epoch": 0.30464988053666603, "grad_norm": 0.40722164511680603, "learning_rate": 6.203672787979967e-05, "loss": 1.3243, "step": 2072 }, { "epoch": 0.30479691233229184, "grad_norm": 0.5938816666603088, "learning_rate": 6.196994991652755e-05, "loss": 1.2501, "step": 2073 }, { "epoch": 0.30494394412791764, "grad_norm": 0.6482542753219604, "learning_rate": 6.190317195325542e-05, "loss": 1.3253, "step": 2074 }, { "epoch": 0.30509097592354345, "grad_norm": 0.5123575329780579, "learning_rate": 6.183639398998331e-05, "loss": 1.3919, "step": 2075 }, { "epoch": 0.30523800771916926, "grad_norm": 0.4818158447742462, "learning_rate": 6.176961602671118e-05, "loss": 1.2156, "step": 2076 }, { "epoch": 0.30538503951479506, "grad_norm": 0.5782576203346252, "learning_rate": 6.170283806343907e-05, "loss": 1.2671, "step": 2077 }, { "epoch": 0.30553207131042087, "grad_norm": 0.587410032749176, "learning_rate": 6.163606010016695e-05, "loss": 1.4907, "step": 2078 }, { "epoch": 0.3056791031060467, "grad_norm": 0.6768604516983032, "learning_rate": 6.156928213689483e-05, "loss": 0.9671, "step": 2079 }, { "epoch": 0.3058261349016725, "grad_norm": 0.4892834424972534, "learning_rate": 6.150250417362271e-05, "loss": 1.3649, "step": 2080 }, { "epoch": 0.3059731666972983, "grad_norm": 0.47309792041778564, "learning_rate": 6.143572621035059e-05, "loss": 1.2456, "step": 2081 }, { "epoch": 0.3061201984929241, "grad_norm": 0.5504151582717896, "learning_rate": 6.136894824707846e-05, "loss": 1.5576, "step": 2082 }, { "epoch": 0.3062672302885499, "grad_norm": 0.6047011613845825, "learning_rate": 6.130217028380635e-05, "loss": 1.1514, "step": 2083 }, { "epoch": 0.3064142620841757, "grad_norm": 0.51165771484375, "learning_rate": 6.123539232053422e-05, "loss": 1.1545, "step": 2084 }, { "epoch": 0.3065612938798015, "grad_norm": 0.48656582832336426, "learning_rate": 6.11686143572621e-05, "loss": 0.9527, "step": 2085 }, { "epoch": 0.30670832567542733, "grad_norm": 0.401237815618515, "learning_rate": 6.110183639398999e-05, "loss": 1.2807, "step": 2086 }, { "epoch": 0.30685535747105314, "grad_norm": 0.5683399438858032, "learning_rate": 6.103505843071786e-05, "loss": 1.0761, "step": 2087 }, { "epoch": 0.30700238926667894, "grad_norm": 0.46019262075424194, "learning_rate": 6.0968280467445746e-05, "loss": 1.382, "step": 2088 }, { "epoch": 0.30714942106230475, "grad_norm": 0.4964863955974579, "learning_rate": 6.0901502504173626e-05, "loss": 1.8669, "step": 2089 }, { "epoch": 0.3072964528579305, "grad_norm": 0.6929451823234558, "learning_rate": 6.083472454090151e-05, "loss": 1.1238, "step": 2090 }, { "epoch": 0.3074434846535563, "grad_norm": 0.523923933506012, "learning_rate": 6.0767946577629386e-05, "loss": 1.0984, "step": 2091 }, { "epoch": 0.3075905164491821, "grad_norm": 0.463485449552536, "learning_rate": 6.070116861435726e-05, "loss": 1.36, "step": 2092 }, { "epoch": 0.3077375482448079, "grad_norm": 0.6514401435852051, "learning_rate": 6.0634390651085146e-05, "loss": 1.3155, "step": 2093 }, { "epoch": 0.30788458004043373, "grad_norm": 0.6040098667144775, "learning_rate": 6.0567612687813026e-05, "loss": 1.4756, "step": 2094 }, { "epoch": 0.30803161183605954, "grad_norm": 0.4662008285522461, "learning_rate": 6.05008347245409e-05, "loss": 1.0722, "step": 2095 }, { "epoch": 0.30817864363168535, "grad_norm": 0.6664574146270752, "learning_rate": 6.0434056761268785e-05, "loss": 1.1976, "step": 2096 }, { "epoch": 0.30832567542731115, "grad_norm": 0.6326817274093628, "learning_rate": 6.0367278797996665e-05, "loss": 1.5082, "step": 2097 }, { "epoch": 0.30847270722293696, "grad_norm": 0.4981769919395447, "learning_rate": 6.030050083472455e-05, "loss": 1.15, "step": 2098 }, { "epoch": 0.30861973901856277, "grad_norm": 0.4260229170322418, "learning_rate": 6.0233722871452425e-05, "loss": 1.5063, "step": 2099 }, { "epoch": 0.3087667708141886, "grad_norm": 0.5843422412872314, "learning_rate": 6.01669449081803e-05, "loss": 1.2405, "step": 2100 }, { "epoch": 0.3089138026098144, "grad_norm": 0.806702196598053, "learning_rate": 6.0100166944908185e-05, "loss": 1.1449, "step": 2101 }, { "epoch": 0.3090608344054402, "grad_norm": 0.5783898234367371, "learning_rate": 6.0033388981636065e-05, "loss": 1.2454, "step": 2102 }, { "epoch": 0.309207866201066, "grad_norm": 0.5278841853141785, "learning_rate": 5.996661101836394e-05, "loss": 1.2367, "step": 2103 }, { "epoch": 0.3093548979966918, "grad_norm": 0.6454164981842041, "learning_rate": 5.9899833055091825e-05, "loss": 1.1, "step": 2104 }, { "epoch": 0.3095019297923176, "grad_norm": 0.5634920597076416, "learning_rate": 5.98330550918197e-05, "loss": 1.2146, "step": 2105 }, { "epoch": 0.3096489615879434, "grad_norm": 0.6203880310058594, "learning_rate": 5.9766277128547585e-05, "loss": 1.2592, "step": 2106 }, { "epoch": 0.3097959933835692, "grad_norm": 0.47032856941223145, "learning_rate": 5.9699499165275465e-05, "loss": 1.0643, "step": 2107 }, { "epoch": 0.309943025179195, "grad_norm": 0.7910390496253967, "learning_rate": 5.963272120200334e-05, "loss": 1.0906, "step": 2108 }, { "epoch": 0.3100900569748208, "grad_norm": 0.5974732637405396, "learning_rate": 5.9565943238731224e-05, "loss": 1.0601, "step": 2109 }, { "epoch": 0.3102370887704466, "grad_norm": 0.4613460898399353, "learning_rate": 5.9499165275459104e-05, "loss": 1.1826, "step": 2110 }, { "epoch": 0.3103841205660724, "grad_norm": 0.5288681387901306, "learning_rate": 5.943238731218698e-05, "loss": 1.1737, "step": 2111 }, { "epoch": 0.3105311523616982, "grad_norm": 0.6969495415687561, "learning_rate": 5.9365609348914864e-05, "loss": 1.1476, "step": 2112 }, { "epoch": 0.310678184157324, "grad_norm": 0.47483837604522705, "learning_rate": 5.929883138564274e-05, "loss": 1.019, "step": 2113 }, { "epoch": 0.3108252159529498, "grad_norm": 0.5272612571716309, "learning_rate": 5.9232053422370624e-05, "loss": 1.3846, "step": 2114 }, { "epoch": 0.3109722477485756, "grad_norm": 0.475140243768692, "learning_rate": 5.9165275459098504e-05, "loss": 1.0571, "step": 2115 }, { "epoch": 0.31111927954420143, "grad_norm": 0.4777458608150482, "learning_rate": 5.909849749582638e-05, "loss": 1.1352, "step": 2116 }, { "epoch": 0.31126631133982724, "grad_norm": 0.5948438048362732, "learning_rate": 5.9031719532554264e-05, "loss": 1.3317, "step": 2117 }, { "epoch": 0.31141334313545305, "grad_norm": 0.5738540887832642, "learning_rate": 5.896494156928214e-05, "loss": 1.7668, "step": 2118 }, { "epoch": 0.31156037493107885, "grad_norm": 0.46423032879829407, "learning_rate": 5.889816360601002e-05, "loss": 1.184, "step": 2119 }, { "epoch": 0.31170740672670466, "grad_norm": 0.8362725973129272, "learning_rate": 5.8831385642737904e-05, "loss": 1.1758, "step": 2120 }, { "epoch": 0.31185443852233047, "grad_norm": 0.5988175868988037, "learning_rate": 5.876460767946578e-05, "loss": 1.2444, "step": 2121 }, { "epoch": 0.3120014703179563, "grad_norm": 0.6251406669616699, "learning_rate": 5.8697829716193664e-05, "loss": 1.0424, "step": 2122 }, { "epoch": 0.3121485021135821, "grad_norm": 0.5768373012542725, "learning_rate": 5.863105175292154e-05, "loss": 1.2547, "step": 2123 }, { "epoch": 0.3122955339092079, "grad_norm": 0.4633745849132538, "learning_rate": 5.856427378964942e-05, "loss": 1.241, "step": 2124 }, { "epoch": 0.3124425657048337, "grad_norm": 0.6544762253761292, "learning_rate": 5.84974958263773e-05, "loss": 1.1777, "step": 2125 }, { "epoch": 0.31258959750045945, "grad_norm": 0.6211328506469727, "learning_rate": 5.8430717863105176e-05, "loss": 1.0539, "step": 2126 }, { "epoch": 0.31273662929608526, "grad_norm": 0.6268273591995239, "learning_rate": 5.8363939899833056e-05, "loss": 1.3174, "step": 2127 }, { "epoch": 0.31288366109171106, "grad_norm": 0.501369833946228, "learning_rate": 5.829716193656094e-05, "loss": 1.0784, "step": 2128 }, { "epoch": 0.31303069288733687, "grad_norm": 0.5850099325180054, "learning_rate": 5.8230383973288816e-05, "loss": 1.6382, "step": 2129 }, { "epoch": 0.3131777246829627, "grad_norm": 0.49083012342453003, "learning_rate": 5.81636060100167e-05, "loss": 0.9565, "step": 2130 }, { "epoch": 0.3133247564785885, "grad_norm": 0.7709200382232666, "learning_rate": 5.8096828046744576e-05, "loss": 1.0898, "step": 2131 }, { "epoch": 0.3134717882742143, "grad_norm": 0.5617043972015381, "learning_rate": 5.8030050083472456e-05, "loss": 1.0424, "step": 2132 }, { "epoch": 0.3136188200698401, "grad_norm": 0.476923406124115, "learning_rate": 5.796327212020034e-05, "loss": 1.1124, "step": 2133 }, { "epoch": 0.3137658518654659, "grad_norm": 0.546636164188385, "learning_rate": 5.7896494156928216e-05, "loss": 1.1691, "step": 2134 }, { "epoch": 0.3139128836610917, "grad_norm": 0.48682162165641785, "learning_rate": 5.782971619365609e-05, "loss": 1.3057, "step": 2135 }, { "epoch": 0.3140599154567175, "grad_norm": 0.6608622074127197, "learning_rate": 5.7762938230383976e-05, "loss": 1.0331, "step": 2136 }, { "epoch": 0.3142069472523433, "grad_norm": 0.446187824010849, "learning_rate": 5.7696160267111856e-05, "loss": 1.5996, "step": 2137 }, { "epoch": 0.31435397904796913, "grad_norm": 0.5622944235801697, "learning_rate": 5.762938230383974e-05, "loss": 1.0326, "step": 2138 }, { "epoch": 0.31450101084359494, "grad_norm": 0.5662122368812561, "learning_rate": 5.7562604340567616e-05, "loss": 1.5063, "step": 2139 }, { "epoch": 0.31464804263922075, "grad_norm": 0.44566118717193604, "learning_rate": 5.749582637729549e-05, "loss": 1.3098, "step": 2140 }, { "epoch": 0.31479507443484656, "grad_norm": 0.6201196908950806, "learning_rate": 5.7429048414023375e-05, "loss": 1.0215, "step": 2141 }, { "epoch": 0.31494210623047236, "grad_norm": 0.7116391658782959, "learning_rate": 5.7362270450751255e-05, "loss": 0.8962, "step": 2142 }, { "epoch": 0.31508913802609817, "grad_norm": 0.617010772228241, "learning_rate": 5.729549248747913e-05, "loss": 1.3872, "step": 2143 }, { "epoch": 0.3152361698217239, "grad_norm": 0.5955065488815308, "learning_rate": 5.7228714524207015e-05, "loss": 1.1913, "step": 2144 }, { "epoch": 0.31538320161734973, "grad_norm": 0.5372576713562012, "learning_rate": 5.7161936560934895e-05, "loss": 1.1918, "step": 2145 }, { "epoch": 0.31553023341297554, "grad_norm": 0.7438437938690186, "learning_rate": 5.709515859766278e-05, "loss": 1.0361, "step": 2146 }, { "epoch": 0.31567726520860134, "grad_norm": 0.6139705777168274, "learning_rate": 5.7028380634390655e-05, "loss": 0.9817, "step": 2147 }, { "epoch": 0.31582429700422715, "grad_norm": 0.7500253319740295, "learning_rate": 5.696160267111853e-05, "loss": 0.951, "step": 2148 }, { "epoch": 0.31597132879985296, "grad_norm": 0.6581917405128479, "learning_rate": 5.6894824707846415e-05, "loss": 1.4421, "step": 2149 }, { "epoch": 0.31611836059547876, "grad_norm": 0.4528997540473938, "learning_rate": 5.6828046744574295e-05, "loss": 1.3725, "step": 2150 }, { "epoch": 0.31626539239110457, "grad_norm": 0.3959624767303467, "learning_rate": 5.676126878130217e-05, "loss": 1.4206, "step": 2151 }, { "epoch": 0.3164124241867304, "grad_norm": 0.6195608973503113, "learning_rate": 5.6694490818030055e-05, "loss": 1.2833, "step": 2152 }, { "epoch": 0.3165594559823562, "grad_norm": 0.4735466539859772, "learning_rate": 5.662771285475793e-05, "loss": 1.3277, "step": 2153 }, { "epoch": 0.316706487777982, "grad_norm": 0.8019950985908508, "learning_rate": 5.6560934891485815e-05, "loss": 0.9335, "step": 2154 }, { "epoch": 0.3168535195736078, "grad_norm": 0.6185794472694397, "learning_rate": 5.6494156928213694e-05, "loss": 1.3516, "step": 2155 }, { "epoch": 0.3170005513692336, "grad_norm": 0.45999693870544434, "learning_rate": 5.642737896494157e-05, "loss": 0.9986, "step": 2156 }, { "epoch": 0.3171475831648594, "grad_norm": 0.7296234965324402, "learning_rate": 5.6360601001669454e-05, "loss": 1.3268, "step": 2157 }, { "epoch": 0.3172946149604852, "grad_norm": 0.632175087928772, "learning_rate": 5.629382303839733e-05, "loss": 0.9652, "step": 2158 }, { "epoch": 0.31744164675611103, "grad_norm": 0.6801307797431946, "learning_rate": 5.622704507512521e-05, "loss": 1.2704, "step": 2159 }, { "epoch": 0.31758867855173684, "grad_norm": 0.5996660590171814, "learning_rate": 5.6160267111853094e-05, "loss": 1.4063, "step": 2160 }, { "epoch": 0.31773571034736264, "grad_norm": 0.6683328151702881, "learning_rate": 5.609348914858097e-05, "loss": 1.2544, "step": 2161 }, { "epoch": 0.3178827421429884, "grad_norm": 0.6162706017494202, "learning_rate": 5.6026711185308854e-05, "loss": 1.1964, "step": 2162 }, { "epoch": 0.3180297739386142, "grad_norm": 0.6166456937789917, "learning_rate": 5.5959933222036734e-05, "loss": 1.083, "step": 2163 }, { "epoch": 0.31817680573424, "grad_norm": 0.7254467010498047, "learning_rate": 5.589315525876461e-05, "loss": 1.1415, "step": 2164 }, { "epoch": 0.3183238375298658, "grad_norm": 0.559477686882019, "learning_rate": 5.5826377295492494e-05, "loss": 1.1049, "step": 2165 }, { "epoch": 0.3184708693254916, "grad_norm": 0.6183822154998779, "learning_rate": 5.575959933222037e-05, "loss": 1.2181, "step": 2166 }, { "epoch": 0.31861790112111743, "grad_norm": 0.6057455539703369, "learning_rate": 5.569282136894825e-05, "loss": 0.8795, "step": 2167 }, { "epoch": 0.31876493291674324, "grad_norm": 0.6708826422691345, "learning_rate": 5.5626043405676134e-05, "loss": 1.073, "step": 2168 }, { "epoch": 0.31891196471236904, "grad_norm": 0.6185042262077332, "learning_rate": 5.555926544240401e-05, "loss": 0.9732, "step": 2169 }, { "epoch": 0.31905899650799485, "grad_norm": 0.5562993288040161, "learning_rate": 5.5492487479131893e-05, "loss": 1.14, "step": 2170 }, { "epoch": 0.31920602830362066, "grad_norm": 0.6460797786712646, "learning_rate": 5.5425709515859767e-05, "loss": 1.1526, "step": 2171 }, { "epoch": 0.31935306009924647, "grad_norm": 0.7484979629516602, "learning_rate": 5.5358931552587646e-05, "loss": 1.18, "step": 2172 }, { "epoch": 0.3195000918948723, "grad_norm": 0.5642260909080505, "learning_rate": 5.529215358931553e-05, "loss": 0.9063, "step": 2173 }, { "epoch": 0.3196471236904981, "grad_norm": 0.3813646137714386, "learning_rate": 5.5225375626043406e-05, "loss": 1.5806, "step": 2174 }, { "epoch": 0.3197941554861239, "grad_norm": 0.5458641648292542, "learning_rate": 5.5158597662771286e-05, "loss": 0.9211, "step": 2175 }, { "epoch": 0.3199411872817497, "grad_norm": 0.48768460750579834, "learning_rate": 5.509181969949917e-05, "loss": 1.191, "step": 2176 }, { "epoch": 0.3200882190773755, "grad_norm": 0.7325701117515564, "learning_rate": 5.5025041736227046e-05, "loss": 0.9427, "step": 2177 }, { "epoch": 0.3202352508730013, "grad_norm": 0.564491868019104, "learning_rate": 5.495826377295493e-05, "loss": 0.8592, "step": 2178 }, { "epoch": 0.3203822826686271, "grad_norm": 0.39212480187416077, "learning_rate": 5.4891485809682806e-05, "loss": 1.4281, "step": 2179 }, { "epoch": 0.32052931446425287, "grad_norm": 0.7304385304450989, "learning_rate": 5.4824707846410686e-05, "loss": 0.8782, "step": 2180 }, { "epoch": 0.3206763462598787, "grad_norm": 0.44199448823928833, "learning_rate": 5.475792988313857e-05, "loss": 1.1129, "step": 2181 }, { "epoch": 0.3208233780555045, "grad_norm": 0.6550561785697937, "learning_rate": 5.4691151919866446e-05, "loss": 0.9916, "step": 2182 }, { "epoch": 0.3209704098511303, "grad_norm": 0.5354378819465637, "learning_rate": 5.462437395659432e-05, "loss": 0.9334, "step": 2183 }, { "epoch": 0.3211174416467561, "grad_norm": 0.5270412564277649, "learning_rate": 5.4557595993322206e-05, "loss": 0.8894, "step": 2184 }, { "epoch": 0.3212644734423819, "grad_norm": 0.6694762706756592, "learning_rate": 5.4490818030050086e-05, "loss": 0.8685, "step": 2185 }, { "epoch": 0.3214115052380077, "grad_norm": 0.5273659229278564, "learning_rate": 5.442404006677797e-05, "loss": 1.0873, "step": 2186 }, { "epoch": 0.3215585370336335, "grad_norm": 0.7224035263061523, "learning_rate": 5.4357262103505845e-05, "loss": 1.3377, "step": 2187 }, { "epoch": 0.3217055688292593, "grad_norm": 0.4616312086582184, "learning_rate": 5.429048414023372e-05, "loss": 1.2002, "step": 2188 }, { "epoch": 0.32185260062488513, "grad_norm": 0.5747209191322327, "learning_rate": 5.4223706176961605e-05, "loss": 0.9819, "step": 2189 }, { "epoch": 0.32199963242051094, "grad_norm": 0.635442316532135, "learning_rate": 5.4156928213689485e-05, "loss": 0.9196, "step": 2190 }, { "epoch": 0.32214666421613675, "grad_norm": 0.6477551460266113, "learning_rate": 5.409015025041736e-05, "loss": 0.9458, "step": 2191 }, { "epoch": 0.32229369601176255, "grad_norm": 0.4628916084766388, "learning_rate": 5.4023372287145245e-05, "loss": 1.5047, "step": 2192 }, { "epoch": 0.32244072780738836, "grad_norm": 0.7402610778808594, "learning_rate": 5.3956594323873125e-05, "loss": 1.1224, "step": 2193 }, { "epoch": 0.32258775960301417, "grad_norm": 0.8228488564491272, "learning_rate": 5.388981636060101e-05, "loss": 1.0426, "step": 2194 }, { "epoch": 0.32273479139864, "grad_norm": 0.6321390271186829, "learning_rate": 5.3823038397328885e-05, "loss": 1.1965, "step": 2195 }, { "epoch": 0.3228818231942658, "grad_norm": 0.6398823261260986, "learning_rate": 5.375626043405676e-05, "loss": 1.0363, "step": 2196 }, { "epoch": 0.3230288549898916, "grad_norm": 0.5515342950820923, "learning_rate": 5.3689482470784645e-05, "loss": 0.9711, "step": 2197 }, { "epoch": 0.32317588678551734, "grad_norm": 0.6629409193992615, "learning_rate": 5.3622704507512525e-05, "loss": 0.6706, "step": 2198 }, { "epoch": 0.32332291858114315, "grad_norm": 0.5805704593658447, "learning_rate": 5.35559265442404e-05, "loss": 1.2778, "step": 2199 }, { "epoch": 0.32346995037676896, "grad_norm": 0.7290553450584412, "learning_rate": 5.3489148580968285e-05, "loss": 1.4579, "step": 2200 }, { "epoch": 0.32361698217239476, "grad_norm": 0.44673237204551697, "learning_rate": 5.342237061769616e-05, "loss": 1.0192, "step": 2201 }, { "epoch": 0.32376401396802057, "grad_norm": 0.5224797129631042, "learning_rate": 5.3355592654424044e-05, "loss": 1.1086, "step": 2202 }, { "epoch": 0.3239110457636464, "grad_norm": 0.48255455493927, "learning_rate": 5.3288814691151924e-05, "loss": 1.214, "step": 2203 }, { "epoch": 0.3240580775592722, "grad_norm": 0.5036904215812683, "learning_rate": 5.32220367278798e-05, "loss": 1.0661, "step": 2204 }, { "epoch": 0.324205109354898, "grad_norm": 0.6137999296188354, "learning_rate": 5.3155258764607684e-05, "loss": 1.3786, "step": 2205 }, { "epoch": 0.3243521411505238, "grad_norm": 0.6228717565536499, "learning_rate": 5.308848080133556e-05, "loss": 1.2306, "step": 2206 }, { "epoch": 0.3244991729461496, "grad_norm": 0.48210886120796204, "learning_rate": 5.302170283806344e-05, "loss": 1.2736, "step": 2207 }, { "epoch": 0.3246462047417754, "grad_norm": 0.43646901845932007, "learning_rate": 5.2954924874791324e-05, "loss": 1.4311, "step": 2208 }, { "epoch": 0.3247932365374012, "grad_norm": 0.47882890701293945, "learning_rate": 5.28881469115192e-05, "loss": 1.1354, "step": 2209 }, { "epoch": 0.324940268333027, "grad_norm": 0.5947877168655396, "learning_rate": 5.2821368948247084e-05, "loss": 1.2609, "step": 2210 }, { "epoch": 0.32508730012865283, "grad_norm": 0.4554944932460785, "learning_rate": 5.2754590984974964e-05, "loss": 1.6018, "step": 2211 }, { "epoch": 0.32523433192427864, "grad_norm": 0.6398006081581116, "learning_rate": 5.268781302170284e-05, "loss": 1.0233, "step": 2212 }, { "epoch": 0.32538136371990445, "grad_norm": 0.7313606142997742, "learning_rate": 5.2621035058430724e-05, "loss": 0.8505, "step": 2213 }, { "epoch": 0.32552839551553026, "grad_norm": 0.4371834099292755, "learning_rate": 5.25542570951586e-05, "loss": 1.2695, "step": 2214 }, { "epoch": 0.32567542731115606, "grad_norm": 0.6192652583122253, "learning_rate": 5.248747913188648e-05, "loss": 1.1808, "step": 2215 }, { "epoch": 0.3258224591067818, "grad_norm": 0.4705529808998108, "learning_rate": 5.2420701168614363e-05, "loss": 1.5185, "step": 2216 }, { "epoch": 0.3259694909024076, "grad_norm": 0.5240978598594666, "learning_rate": 5.2353923205342237e-05, "loss": 1.3689, "step": 2217 }, { "epoch": 0.32611652269803343, "grad_norm": 0.8479958772659302, "learning_rate": 5.228714524207012e-05, "loss": 1.1046, "step": 2218 }, { "epoch": 0.32626355449365924, "grad_norm": 0.615420937538147, "learning_rate": 5.2220367278797996e-05, "loss": 0.9756, "step": 2219 }, { "epoch": 0.32641058628928504, "grad_norm": 0.7287018299102783, "learning_rate": 5.2153589315525876e-05, "loss": 0.9756, "step": 2220 }, { "epoch": 0.32655761808491085, "grad_norm": 0.6227064728736877, "learning_rate": 5.208681135225376e-05, "loss": 0.9688, "step": 2221 }, { "epoch": 0.32670464988053666, "grad_norm": 0.7073501944541931, "learning_rate": 5.2020033388981636e-05, "loss": 1.003, "step": 2222 }, { "epoch": 0.32685168167616246, "grad_norm": 0.4845917224884033, "learning_rate": 5.195325542570952e-05, "loss": 1.601, "step": 2223 }, { "epoch": 0.32699871347178827, "grad_norm": 0.5899642705917358, "learning_rate": 5.18864774624374e-05, "loss": 1.208, "step": 2224 }, { "epoch": 0.3271457452674141, "grad_norm": 0.6486482620239258, "learning_rate": 5.1819699499165276e-05, "loss": 0.9497, "step": 2225 }, { "epoch": 0.3272927770630399, "grad_norm": 0.6132608652114868, "learning_rate": 5.175292153589316e-05, "loss": 1.4674, "step": 2226 }, { "epoch": 0.3274398088586657, "grad_norm": 0.7314250469207764, "learning_rate": 5.1686143572621036e-05, "loss": 1.0815, "step": 2227 }, { "epoch": 0.3275868406542915, "grad_norm": 0.6710649132728577, "learning_rate": 5.1619365609348916e-05, "loss": 1.2955, "step": 2228 }, { "epoch": 0.3277338724499173, "grad_norm": 0.6601440906524658, "learning_rate": 5.15525876460768e-05, "loss": 1.1983, "step": 2229 }, { "epoch": 0.3278809042455431, "grad_norm": 0.5792474746704102, "learning_rate": 5.1485809682804676e-05, "loss": 1.164, "step": 2230 }, { "epoch": 0.3280279360411689, "grad_norm": 0.5168903470039368, "learning_rate": 5.141903171953256e-05, "loss": 1.3148, "step": 2231 }, { "epoch": 0.32817496783679473, "grad_norm": 0.8182685971260071, "learning_rate": 5.1352253756260436e-05, "loss": 1.0484, "step": 2232 }, { "epoch": 0.32832199963242054, "grad_norm": 0.4211324453353882, "learning_rate": 5.1285475792988315e-05, "loss": 1.5043, "step": 2233 }, { "epoch": 0.3284690314280463, "grad_norm": 0.6285020709037781, "learning_rate": 5.12186978297162e-05, "loss": 0.8636, "step": 2234 }, { "epoch": 0.3286160632236721, "grad_norm": 0.7970768809318542, "learning_rate": 5.1151919866444075e-05, "loss": 1.1073, "step": 2235 }, { "epoch": 0.3287630950192979, "grad_norm": 0.7587257027626038, "learning_rate": 5.108514190317195e-05, "loss": 1.0829, "step": 2236 }, { "epoch": 0.3289101268149237, "grad_norm": 0.6595321297645569, "learning_rate": 5.1018363939899835e-05, "loss": 1.346, "step": 2237 }, { "epoch": 0.3290571586105495, "grad_norm": 0.594467043876648, "learning_rate": 5.0951585976627715e-05, "loss": 1.0484, "step": 2238 }, { "epoch": 0.3292041904061753, "grad_norm": 0.5548723936080933, "learning_rate": 5.08848080133556e-05, "loss": 1.1618, "step": 2239 }, { "epoch": 0.32935122220180113, "grad_norm": 0.5987635254859924, "learning_rate": 5.0818030050083475e-05, "loss": 1.2209, "step": 2240 }, { "epoch": 0.32949825399742694, "grad_norm": 0.4925394654273987, "learning_rate": 5.0751252086811355e-05, "loss": 0.9649, "step": 2241 }, { "epoch": 0.32964528579305274, "grad_norm": 0.6406082510948181, "learning_rate": 5.068447412353924e-05, "loss": 1.2276, "step": 2242 }, { "epoch": 0.32979231758867855, "grad_norm": 0.42717477679252625, "learning_rate": 5.0617696160267115e-05, "loss": 1.4325, "step": 2243 }, { "epoch": 0.32993934938430436, "grad_norm": 0.4234490692615509, "learning_rate": 5.055091819699499e-05, "loss": 1.3257, "step": 2244 }, { "epoch": 0.33008638117993017, "grad_norm": 0.512763261795044, "learning_rate": 5.0484140233722875e-05, "loss": 1.5259, "step": 2245 }, { "epoch": 0.330233412975556, "grad_norm": 0.6326480507850647, "learning_rate": 5.0417362270450755e-05, "loss": 1.273, "step": 2246 }, { "epoch": 0.3303804447711818, "grad_norm": 0.37408584356307983, "learning_rate": 5.035058430717864e-05, "loss": 1.1135, "step": 2247 }, { "epoch": 0.3305274765668076, "grad_norm": 0.5869410037994385, "learning_rate": 5.0283806343906514e-05, "loss": 1.4136, "step": 2248 }, { "epoch": 0.3306745083624334, "grad_norm": 0.6604022979736328, "learning_rate": 5.021702838063439e-05, "loss": 0.9795, "step": 2249 }, { "epoch": 0.3308215401580592, "grad_norm": 0.4989512860774994, "learning_rate": 5.0150250417362274e-05, "loss": 1.2307, "step": 2250 }, { "epoch": 0.330968571953685, "grad_norm": 0.6117305159568787, "learning_rate": 5.0083472454090154e-05, "loss": 0.9597, "step": 2251 }, { "epoch": 0.3311156037493108, "grad_norm": 0.7301452159881592, "learning_rate": 5.001669449081803e-05, "loss": 1.0423, "step": 2252 }, { "epoch": 0.33126263554493657, "grad_norm": 0.40505483746528625, "learning_rate": 4.9949916527545914e-05, "loss": 1.6845, "step": 2253 }, { "epoch": 0.3314096673405624, "grad_norm": 0.5915031433105469, "learning_rate": 4.988313856427379e-05, "loss": 1.1743, "step": 2254 }, { "epoch": 0.3315566991361882, "grad_norm": 0.3573418855667114, "learning_rate": 4.9816360601001674e-05, "loss": 1.4216, "step": 2255 }, { "epoch": 0.331703730931814, "grad_norm": 0.6454871296882629, "learning_rate": 4.9749582637729554e-05, "loss": 0.9562, "step": 2256 }, { "epoch": 0.3318507627274398, "grad_norm": 0.6152846813201904, "learning_rate": 4.9682804674457434e-05, "loss": 1.1665, "step": 2257 }, { "epoch": 0.3319977945230656, "grad_norm": 0.6603919863700867, "learning_rate": 4.961602671118531e-05, "loss": 1.1278, "step": 2258 }, { "epoch": 0.3321448263186914, "grad_norm": 0.6449651122093201, "learning_rate": 4.9549248747913194e-05, "loss": 1.202, "step": 2259 }, { "epoch": 0.3322918581143172, "grad_norm": 0.689803957939148, "learning_rate": 4.9482470784641074e-05, "loss": 0.9214, "step": 2260 }, { "epoch": 0.332438889909943, "grad_norm": 0.5782380104064941, "learning_rate": 4.9415692821368953e-05, "loss": 1.2389, "step": 2261 }, { "epoch": 0.33258592170556883, "grad_norm": 0.5726940631866455, "learning_rate": 4.934891485809683e-05, "loss": 1.1424, "step": 2262 }, { "epoch": 0.33273295350119464, "grad_norm": 0.5231810808181763, "learning_rate": 4.9282136894824707e-05, "loss": 1.3518, "step": 2263 }, { "epoch": 0.33287998529682045, "grad_norm": 0.7004649043083191, "learning_rate": 4.921535893155259e-05, "loss": 0.997, "step": 2264 }, { "epoch": 0.33302701709244625, "grad_norm": 0.5009940266609192, "learning_rate": 4.914858096828047e-05, "loss": 1.0904, "step": 2265 }, { "epoch": 0.33317404888807206, "grad_norm": 0.5874700546264648, "learning_rate": 4.9081803005008346e-05, "loss": 1.2298, "step": 2266 }, { "epoch": 0.33332108068369787, "grad_norm": 0.39963170886039734, "learning_rate": 4.9015025041736226e-05, "loss": 1.4473, "step": 2267 }, { "epoch": 0.3334681124793237, "grad_norm": 0.8343802094459534, "learning_rate": 4.894824707846411e-05, "loss": 1.2995, "step": 2268 }, { "epoch": 0.3336151442749495, "grad_norm": 0.6021217703819275, "learning_rate": 4.888146911519199e-05, "loss": 1.0515, "step": 2269 }, { "epoch": 0.3337621760705753, "grad_norm": 0.6401078701019287, "learning_rate": 4.8814691151919866e-05, "loss": 1.1511, "step": 2270 }, { "epoch": 0.33390920786620104, "grad_norm": 0.4249172806739807, "learning_rate": 4.8747913188647746e-05, "loss": 1.5458, "step": 2271 }, { "epoch": 0.33405623966182685, "grad_norm": 0.6070518493652344, "learning_rate": 4.8681135225375626e-05, "loss": 1.1071, "step": 2272 }, { "epoch": 0.33420327145745266, "grad_norm": 0.5734437108039856, "learning_rate": 4.861435726210351e-05, "loss": 1.4452, "step": 2273 }, { "epoch": 0.33435030325307846, "grad_norm": 0.49365636706352234, "learning_rate": 4.8547579298831386e-05, "loss": 1.0006, "step": 2274 }, { "epoch": 0.33449733504870427, "grad_norm": 0.7907768487930298, "learning_rate": 4.8480801335559266e-05, "loss": 1.1057, "step": 2275 }, { "epoch": 0.3346443668443301, "grad_norm": 0.7318785190582275, "learning_rate": 4.8414023372287146e-05, "loss": 0.8933, "step": 2276 }, { "epoch": 0.3347913986399559, "grad_norm": 0.39243656396865845, "learning_rate": 4.834724540901503e-05, "loss": 1.0931, "step": 2277 }, { "epoch": 0.3349384304355817, "grad_norm": 0.5255695581436157, "learning_rate": 4.8280467445742906e-05, "loss": 1.6447, "step": 2278 }, { "epoch": 0.3350854622312075, "grad_norm": 0.5996083617210388, "learning_rate": 4.8213689482470785e-05, "loss": 1.2734, "step": 2279 }, { "epoch": 0.3352324940268333, "grad_norm": 0.6361013650894165, "learning_rate": 4.8146911519198665e-05, "loss": 1.2941, "step": 2280 }, { "epoch": 0.3353795258224591, "grad_norm": 0.4729306101799011, "learning_rate": 4.8080133555926545e-05, "loss": 1.2779, "step": 2281 }, { "epoch": 0.3355265576180849, "grad_norm": 0.6513959765434265, "learning_rate": 4.8013355592654425e-05, "loss": 1.3192, "step": 2282 }, { "epoch": 0.3356735894137107, "grad_norm": 0.7217071056365967, "learning_rate": 4.7946577629382305e-05, "loss": 0.9922, "step": 2283 }, { "epoch": 0.33582062120933653, "grad_norm": 0.7658651471138, "learning_rate": 4.7879799666110185e-05, "loss": 0.9025, "step": 2284 }, { "epoch": 0.33596765300496234, "grad_norm": 0.5754466652870178, "learning_rate": 4.7813021702838065e-05, "loss": 0.8889, "step": 2285 }, { "epoch": 0.33611468480058815, "grad_norm": 0.8149365186691284, "learning_rate": 4.7746243739565945e-05, "loss": 0.9384, "step": 2286 }, { "epoch": 0.33626171659621396, "grad_norm": 0.744154155254364, "learning_rate": 4.7679465776293825e-05, "loss": 0.9686, "step": 2287 }, { "epoch": 0.33640874839183976, "grad_norm": 0.6511445641517639, "learning_rate": 4.7612687813021705e-05, "loss": 1.3204, "step": 2288 }, { "epoch": 0.3365557801874655, "grad_norm": 0.5096464157104492, "learning_rate": 4.7545909849749585e-05, "loss": 1.0897, "step": 2289 }, { "epoch": 0.3367028119830913, "grad_norm": 0.6455485820770264, "learning_rate": 4.7479131886477465e-05, "loss": 1.1029, "step": 2290 }, { "epoch": 0.33684984377871713, "grad_norm": 0.644174337387085, "learning_rate": 4.7412353923205345e-05, "loss": 1.2865, "step": 2291 }, { "epoch": 0.33699687557434294, "grad_norm": 0.5884138345718384, "learning_rate": 4.7345575959933225e-05, "loss": 1.1283, "step": 2292 }, { "epoch": 0.33714390736996874, "grad_norm": 0.4165154695510864, "learning_rate": 4.7278797996661104e-05, "loss": 3.8881, "step": 2293 }, { "epoch": 0.33729093916559455, "grad_norm": 0.5498266816139221, "learning_rate": 4.7212020033388984e-05, "loss": 1.0039, "step": 2294 }, { "epoch": 0.33743797096122036, "grad_norm": 0.6789774894714355, "learning_rate": 4.7145242070116864e-05, "loss": 1.4324, "step": 2295 }, { "epoch": 0.33758500275684616, "grad_norm": 0.6630209684371948, "learning_rate": 4.7078464106844744e-05, "loss": 1.0282, "step": 2296 }, { "epoch": 0.33773203455247197, "grad_norm": 0.5246013402938843, "learning_rate": 4.7011686143572624e-05, "loss": 0.9495, "step": 2297 }, { "epoch": 0.3378790663480978, "grad_norm": 0.5729802846908569, "learning_rate": 4.6944908180300504e-05, "loss": 1.1237, "step": 2298 }, { "epoch": 0.3380260981437236, "grad_norm": 0.43020233511924744, "learning_rate": 4.6878130217028384e-05, "loss": 1.0887, "step": 2299 }, { "epoch": 0.3381731299393494, "grad_norm": 0.5093066096305847, "learning_rate": 4.6811352253756264e-05, "loss": 0.9949, "step": 2300 }, { "epoch": 0.3383201617349752, "grad_norm": 0.3662153482437134, "learning_rate": 4.6744574290484144e-05, "loss": 1.3645, "step": 2301 }, { "epoch": 0.338467193530601, "grad_norm": 0.5394424796104431, "learning_rate": 4.667779632721202e-05, "loss": 1.1641, "step": 2302 }, { "epoch": 0.3386142253262268, "grad_norm": 0.8383854031562805, "learning_rate": 4.6611018363939904e-05, "loss": 1.0179, "step": 2303 }, { "epoch": 0.3387612571218526, "grad_norm": 0.6382398009300232, "learning_rate": 4.6544240400667784e-05, "loss": 0.9838, "step": 2304 }, { "epoch": 0.33890828891747843, "grad_norm": 0.9111227989196777, "learning_rate": 4.6477462437395664e-05, "loss": 0.9317, "step": 2305 }, { "epoch": 0.33905532071310424, "grad_norm": 0.7547916173934937, "learning_rate": 4.641068447412354e-05, "loss": 1.6748, "step": 2306 }, { "epoch": 0.33920235250873, "grad_norm": 0.647483766078949, "learning_rate": 4.6343906510851423e-05, "loss": 1.0603, "step": 2307 }, { "epoch": 0.3393493843043558, "grad_norm": 0.47946277260780334, "learning_rate": 4.6277128547579303e-05, "loss": 1.4908, "step": 2308 }, { "epoch": 0.3394964160999816, "grad_norm": 0.7027813196182251, "learning_rate": 4.621035058430718e-05, "loss": 1.3098, "step": 2309 }, { "epoch": 0.3396434478956074, "grad_norm": 0.4709910750389099, "learning_rate": 4.6143572621035056e-05, "loss": 1.1122, "step": 2310 }, { "epoch": 0.3397904796912332, "grad_norm": 0.7265318036079407, "learning_rate": 4.6076794657762936e-05, "loss": 1.0021, "step": 2311 }, { "epoch": 0.339937511486859, "grad_norm": 0.5767427086830139, "learning_rate": 4.601001669449082e-05, "loss": 0.833, "step": 2312 }, { "epoch": 0.34008454328248483, "grad_norm": 0.6273320913314819, "learning_rate": 4.59432387312187e-05, "loss": 1.0621, "step": 2313 }, { "epoch": 0.34023157507811064, "grad_norm": 0.5055856704711914, "learning_rate": 4.5876460767946576e-05, "loss": 1.487, "step": 2314 }, { "epoch": 0.34037860687373644, "grad_norm": 0.49255913496017456, "learning_rate": 4.5809682804674456e-05, "loss": 1.0421, "step": 2315 }, { "epoch": 0.34052563866936225, "grad_norm": 0.669850766658783, "learning_rate": 4.574290484140234e-05, "loss": 1.3224, "step": 2316 }, { "epoch": 0.34067267046498806, "grad_norm": 0.5544292330741882, "learning_rate": 4.567612687813022e-05, "loss": 1.1047, "step": 2317 }, { "epoch": 0.34081970226061387, "grad_norm": 0.5823951959609985, "learning_rate": 4.5609348914858096e-05, "loss": 1.1394, "step": 2318 }, { "epoch": 0.3409667340562397, "grad_norm": 0.5695863962173462, "learning_rate": 4.5542570951585976e-05, "loss": 1.0523, "step": 2319 }, { "epoch": 0.3411137658518655, "grad_norm": 0.580834686756134, "learning_rate": 4.5475792988313856e-05, "loss": 1.3744, "step": 2320 }, { "epoch": 0.3412607976474913, "grad_norm": 0.4268816411495209, "learning_rate": 4.540901502504174e-05, "loss": 1.0924, "step": 2321 }, { "epoch": 0.3414078294431171, "grad_norm": 0.6674205660820007, "learning_rate": 4.5342237061769616e-05, "loss": 0.9592, "step": 2322 }, { "epoch": 0.3415548612387429, "grad_norm": 0.6535343527793884, "learning_rate": 4.5275459098497496e-05, "loss": 1.4539, "step": 2323 }, { "epoch": 0.3417018930343687, "grad_norm": 0.5609108805656433, "learning_rate": 4.5208681135225376e-05, "loss": 1.3799, "step": 2324 }, { "epoch": 0.34184892482999446, "grad_norm": 0.48847395181655884, "learning_rate": 4.514190317195326e-05, "loss": 1.1108, "step": 2325 }, { "epoch": 0.34199595662562027, "grad_norm": 0.6830761432647705, "learning_rate": 4.5075125208681135e-05, "loss": 1.145, "step": 2326 }, { "epoch": 0.3421429884212461, "grad_norm": 0.593635618686676, "learning_rate": 4.5008347245409015e-05, "loss": 1.3417, "step": 2327 }, { "epoch": 0.3422900202168719, "grad_norm": 0.641505777835846, "learning_rate": 4.4941569282136895e-05, "loss": 0.9054, "step": 2328 }, { "epoch": 0.3424370520124977, "grad_norm": 0.6394991874694824, "learning_rate": 4.4874791318864775e-05, "loss": 1.1233, "step": 2329 }, { "epoch": 0.3425840838081235, "grad_norm": 0.47683000564575195, "learning_rate": 4.4808013355592655e-05, "loss": 1.5755, "step": 2330 }, { "epoch": 0.3427311156037493, "grad_norm": 0.686940610408783, "learning_rate": 4.4741235392320535e-05, "loss": 1.0557, "step": 2331 }, { "epoch": 0.3428781473993751, "grad_norm": 0.7226173877716064, "learning_rate": 4.4674457429048415e-05, "loss": 1.1157, "step": 2332 }, { "epoch": 0.3430251791950009, "grad_norm": 0.7403520345687866, "learning_rate": 4.4607679465776295e-05, "loss": 0.9835, "step": 2333 }, { "epoch": 0.3431722109906267, "grad_norm": 0.5846675634384155, "learning_rate": 4.4540901502504175e-05, "loss": 1.3073, "step": 2334 }, { "epoch": 0.34331924278625253, "grad_norm": 0.5107977986335754, "learning_rate": 4.4474123539232055e-05, "loss": 0.8999, "step": 2335 }, { "epoch": 0.34346627458187834, "grad_norm": 0.6047511100769043, "learning_rate": 4.4407345575959935e-05, "loss": 1.0848, "step": 2336 }, { "epoch": 0.34361330637750415, "grad_norm": 0.4467674493789673, "learning_rate": 4.4340567612687815e-05, "loss": 1.1833, "step": 2337 }, { "epoch": 0.34376033817312995, "grad_norm": 0.5157157778739929, "learning_rate": 4.4273789649415695e-05, "loss": 1.345, "step": 2338 }, { "epoch": 0.34390736996875576, "grad_norm": 0.7002535462379456, "learning_rate": 4.4207011686143574e-05, "loss": 0.9835, "step": 2339 }, { "epoch": 0.34405440176438157, "grad_norm": 0.5245396494865417, "learning_rate": 4.4140233722871454e-05, "loss": 1.3377, "step": 2340 }, { "epoch": 0.3442014335600074, "grad_norm": 0.6940892338752747, "learning_rate": 4.4073455759599334e-05, "loss": 1.0251, "step": 2341 }, { "epoch": 0.3443484653556332, "grad_norm": 0.46222448348999023, "learning_rate": 4.4006677796327214e-05, "loss": 0.8865, "step": 2342 }, { "epoch": 0.34449549715125893, "grad_norm": 0.480270117521286, "learning_rate": 4.3939899833055094e-05, "loss": 1.1615, "step": 2343 }, { "epoch": 0.34464252894688474, "grad_norm": 0.54289311170578, "learning_rate": 4.3873121869782974e-05, "loss": 1.2131, "step": 2344 }, { "epoch": 0.34478956074251055, "grad_norm": 0.653423011302948, "learning_rate": 4.3806343906510854e-05, "loss": 1.3846, "step": 2345 }, { "epoch": 0.34493659253813636, "grad_norm": 0.5071634650230408, "learning_rate": 4.373956594323873e-05, "loss": 1.8782, "step": 2346 }, { "epoch": 0.34508362433376216, "grad_norm": 0.6017564535140991, "learning_rate": 4.3672787979966614e-05, "loss": 1.0623, "step": 2347 }, { "epoch": 0.34523065612938797, "grad_norm": 0.5143142938613892, "learning_rate": 4.3606010016694494e-05, "loss": 1.2412, "step": 2348 }, { "epoch": 0.3453776879250138, "grad_norm": 0.6182072758674622, "learning_rate": 4.3539232053422374e-05, "loss": 1.0744, "step": 2349 }, { "epoch": 0.3455247197206396, "grad_norm": 0.46266692876815796, "learning_rate": 4.3472454090150254e-05, "loss": 1.7368, "step": 2350 }, { "epoch": 0.3456717515162654, "grad_norm": 0.5848096609115601, "learning_rate": 4.3405676126878134e-05, "loss": 0.8464, "step": 2351 }, { "epoch": 0.3458187833118912, "grad_norm": 0.6598659753799438, "learning_rate": 4.3338898163606014e-05, "loss": 1.103, "step": 2352 }, { "epoch": 0.345965815107517, "grad_norm": 0.4610240161418915, "learning_rate": 4.3272120200333893e-05, "loss": 0.8795, "step": 2353 }, { "epoch": 0.3461128469031428, "grad_norm": 0.650132954120636, "learning_rate": 4.3205342237061773e-05, "loss": 0.8927, "step": 2354 }, { "epoch": 0.3462598786987686, "grad_norm": 0.6615062355995178, "learning_rate": 4.313856427378965e-05, "loss": 1.2509, "step": 2355 }, { "epoch": 0.3464069104943944, "grad_norm": 0.6247631311416626, "learning_rate": 4.307178631051753e-05, "loss": 1.3906, "step": 2356 }, { "epoch": 0.34655394229002023, "grad_norm": 0.5597137808799744, "learning_rate": 4.300500834724541e-05, "loss": 0.9937, "step": 2357 }, { "epoch": 0.34670097408564604, "grad_norm": 0.7856898903846741, "learning_rate": 4.293823038397329e-05, "loss": 0.9241, "step": 2358 }, { "epoch": 0.34684800588127185, "grad_norm": 0.5129521489143372, "learning_rate": 4.2871452420701166e-05, "loss": 1.0746, "step": 2359 }, { "epoch": 0.34699503767689766, "grad_norm": 0.42778074741363525, "learning_rate": 4.280467445742905e-05, "loss": 1.1416, "step": 2360 }, { "epoch": 0.3471420694725234, "grad_norm": 0.531898021697998, "learning_rate": 4.273789649415693e-05, "loss": 1.2406, "step": 2361 }, { "epoch": 0.3472891012681492, "grad_norm": 0.7625805139541626, "learning_rate": 4.267111853088481e-05, "loss": 1.0331, "step": 2362 }, { "epoch": 0.347436133063775, "grad_norm": 0.5260778069496155, "learning_rate": 4.2604340567612686e-05, "loss": 0.7939, "step": 2363 }, { "epoch": 0.34758316485940083, "grad_norm": 0.6454848051071167, "learning_rate": 4.253756260434057e-05, "loss": 1.0481, "step": 2364 }, { "epoch": 0.34773019665502664, "grad_norm": 0.4890654683113098, "learning_rate": 4.247078464106845e-05, "loss": 0.939, "step": 2365 }, { "epoch": 0.34787722845065244, "grad_norm": 0.6658653616905212, "learning_rate": 4.240400667779633e-05, "loss": 1.1672, "step": 2366 }, { "epoch": 0.34802426024627825, "grad_norm": 0.4238223731517792, "learning_rate": 4.2337228714524206e-05, "loss": 1.5498, "step": 2367 }, { "epoch": 0.34817129204190406, "grad_norm": 0.8019214868545532, "learning_rate": 4.2270450751252086e-05, "loss": 1.0367, "step": 2368 }, { "epoch": 0.34831832383752986, "grad_norm": 0.5940684676170349, "learning_rate": 4.220367278797997e-05, "loss": 1.0914, "step": 2369 }, { "epoch": 0.34846535563315567, "grad_norm": 0.5725755095481873, "learning_rate": 4.213689482470785e-05, "loss": 1.1792, "step": 2370 }, { "epoch": 0.3486123874287815, "grad_norm": 0.5893556475639343, "learning_rate": 4.2070116861435725e-05, "loss": 1.0876, "step": 2371 }, { "epoch": 0.3487594192244073, "grad_norm": 0.4181709885597229, "learning_rate": 4.2003338898163605e-05, "loss": 1.1739, "step": 2372 }, { "epoch": 0.3489064510200331, "grad_norm": 0.7324404120445251, "learning_rate": 4.193656093489149e-05, "loss": 1.0204, "step": 2373 }, { "epoch": 0.3490534828156589, "grad_norm": 0.49994581937789917, "learning_rate": 4.186978297161937e-05, "loss": 1.4631, "step": 2374 }, { "epoch": 0.3492005146112847, "grad_norm": 0.5485938787460327, "learning_rate": 4.1803005008347245e-05, "loss": 0.9379, "step": 2375 }, { "epoch": 0.3493475464069105, "grad_norm": 0.4848518669605255, "learning_rate": 4.1736227045075125e-05, "loss": 1.5529, "step": 2376 }, { "epoch": 0.3494945782025363, "grad_norm": 0.6775643825531006, "learning_rate": 4.1669449081803005e-05, "loss": 1.0242, "step": 2377 }, { "epoch": 0.34964160999816213, "grad_norm": 0.4051017761230469, "learning_rate": 4.160267111853089e-05, "loss": 1.0552, "step": 2378 }, { "epoch": 0.3497886417937879, "grad_norm": 0.5027542114257812, "learning_rate": 4.1535893155258765e-05, "loss": 1.2447, "step": 2379 }, { "epoch": 0.3499356735894137, "grad_norm": 0.646805465221405, "learning_rate": 4.1469115191986645e-05, "loss": 0.996, "step": 2380 }, { "epoch": 0.3500827053850395, "grad_norm": 0.7610476016998291, "learning_rate": 4.1402337228714525e-05, "loss": 0.8904, "step": 2381 }, { "epoch": 0.3502297371806653, "grad_norm": 0.5126630067825317, "learning_rate": 4.133555926544241e-05, "loss": 1.8088, "step": 2382 }, { "epoch": 0.3503767689762911, "grad_norm": 0.668276309967041, "learning_rate": 4.1268781302170285e-05, "loss": 1.0753, "step": 2383 }, { "epoch": 0.3505238007719169, "grad_norm": 0.6127064824104309, "learning_rate": 4.1202003338898165e-05, "loss": 1.2315, "step": 2384 }, { "epoch": 0.3506708325675427, "grad_norm": 0.527677595615387, "learning_rate": 4.1135225375626044e-05, "loss": 1.1333, "step": 2385 }, { "epoch": 0.35081786436316853, "grad_norm": 0.5830051302909851, "learning_rate": 4.1068447412353924e-05, "loss": 1.1935, "step": 2386 }, { "epoch": 0.35096489615879434, "grad_norm": 0.5479134917259216, "learning_rate": 4.1001669449081804e-05, "loss": 1.3208, "step": 2387 }, { "epoch": 0.35111192795442014, "grad_norm": 0.5065488815307617, "learning_rate": 4.0934891485809684e-05, "loss": 1.3829, "step": 2388 }, { "epoch": 0.35125895975004595, "grad_norm": 0.41520965099334717, "learning_rate": 4.0868113522537564e-05, "loss": 1.4562, "step": 2389 }, { "epoch": 0.35140599154567176, "grad_norm": 0.46565890312194824, "learning_rate": 4.0801335559265444e-05, "loss": 1.1664, "step": 2390 }, { "epoch": 0.35155302334129757, "grad_norm": 0.5291383862495422, "learning_rate": 4.0734557595993324e-05, "loss": 1.2867, "step": 2391 }, { "epoch": 0.3517000551369234, "grad_norm": 0.6068446636199951, "learning_rate": 4.0667779632721204e-05, "loss": 1.0823, "step": 2392 }, { "epoch": 0.3518470869325492, "grad_norm": 0.3464789092540741, "learning_rate": 4.0601001669449084e-05, "loss": 1.7238, "step": 2393 }, { "epoch": 0.351994118728175, "grad_norm": 0.6590052843093872, "learning_rate": 4.0534223706176964e-05, "loss": 1.1294, "step": 2394 }, { "epoch": 0.3521411505238008, "grad_norm": 0.8543641567230225, "learning_rate": 4.0467445742904844e-05, "loss": 1.2818, "step": 2395 }, { "epoch": 0.3522881823194266, "grad_norm": 0.5236414670944214, "learning_rate": 4.0400667779632724e-05, "loss": 1.6964, "step": 2396 }, { "epoch": 0.35243521411505235, "grad_norm": 0.5849925875663757, "learning_rate": 4.0333889816360604e-05, "loss": 1.1266, "step": 2397 }, { "epoch": 0.35258224591067816, "grad_norm": 0.5377421975135803, "learning_rate": 4.0267111853088484e-05, "loss": 1.0706, "step": 2398 }, { "epoch": 0.35272927770630397, "grad_norm": 0.6394080519676208, "learning_rate": 4.0200333889816363e-05, "loss": 1.1925, "step": 2399 }, { "epoch": 0.3528763095019298, "grad_norm": 0.6307380199432373, "learning_rate": 4.0133555926544243e-05, "loss": 1.4735, "step": 2400 }, { "epoch": 0.3530233412975556, "grad_norm": 0.5713698863983154, "learning_rate": 4.006677796327212e-05, "loss": 0.9661, "step": 2401 }, { "epoch": 0.3531703730931814, "grad_norm": 0.5154237747192383, "learning_rate": 4e-05, "loss": 1.2448, "step": 2402 }, { "epoch": 0.3533174048888072, "grad_norm": 0.7521777153015137, "learning_rate": 3.9933222036727876e-05, "loss": 1.2533, "step": 2403 }, { "epoch": 0.353464436684433, "grad_norm": 0.5829699039459229, "learning_rate": 3.986644407345576e-05, "loss": 1.3261, "step": 2404 }, { "epoch": 0.3536114684800588, "grad_norm": 0.2811475396156311, "learning_rate": 3.979966611018364e-05, "loss": 1.4352, "step": 2405 }, { "epoch": 0.3537585002756846, "grad_norm": 0.8883007764816284, "learning_rate": 3.973288814691152e-05, "loss": 1.4472, "step": 2406 }, { "epoch": 0.3539055320713104, "grad_norm": 0.5030226111412048, "learning_rate": 3.9666110183639396e-05, "loss": 1.3874, "step": 2407 }, { "epoch": 0.35405256386693623, "grad_norm": 0.4832488000392914, "learning_rate": 3.959933222036728e-05, "loss": 1.2992, "step": 2408 }, { "epoch": 0.35419959566256204, "grad_norm": 0.5988725423812866, "learning_rate": 3.953255425709516e-05, "loss": 1.0363, "step": 2409 }, { "epoch": 0.35434662745818785, "grad_norm": 0.59149569272995, "learning_rate": 3.946577629382304e-05, "loss": 1.2509, "step": 2410 }, { "epoch": 0.35449365925381365, "grad_norm": 0.5726426243782043, "learning_rate": 3.9398998330550916e-05, "loss": 1.2792, "step": 2411 }, { "epoch": 0.35464069104943946, "grad_norm": 0.4050772488117218, "learning_rate": 3.93322203672788e-05, "loss": 1.1615, "step": 2412 }, { "epoch": 0.35478772284506527, "grad_norm": 0.6124445796012878, "learning_rate": 3.926544240400668e-05, "loss": 1.2942, "step": 2413 }, { "epoch": 0.3549347546406911, "grad_norm": 0.5558871626853943, "learning_rate": 3.919866444073456e-05, "loss": 1.2151, "step": 2414 }, { "epoch": 0.3550817864363168, "grad_norm": 0.4722219407558441, "learning_rate": 3.9131886477462436e-05, "loss": 1.2328, "step": 2415 }, { "epoch": 0.35522881823194263, "grad_norm": 0.5244354605674744, "learning_rate": 3.9065108514190316e-05, "loss": 1.2152, "step": 2416 }, { "epoch": 0.35537585002756844, "grad_norm": 0.5670085549354553, "learning_rate": 3.89983305509182e-05, "loss": 1.1484, "step": 2417 }, { "epoch": 0.35552288182319425, "grad_norm": 0.5525314211845398, "learning_rate": 3.893155258764608e-05, "loss": 1.4999, "step": 2418 }, { "epoch": 0.35566991361882005, "grad_norm": 0.5421735048294067, "learning_rate": 3.8864774624373955e-05, "loss": 1.0378, "step": 2419 }, { "epoch": 0.35581694541444586, "grad_norm": 0.703663170337677, "learning_rate": 3.8797996661101835e-05, "loss": 0.9179, "step": 2420 }, { "epoch": 0.35596397721007167, "grad_norm": 0.6187422871589661, "learning_rate": 3.873121869782972e-05, "loss": 1.5995, "step": 2421 }, { "epoch": 0.3561110090056975, "grad_norm": 0.4639596939086914, "learning_rate": 3.86644407345576e-05, "loss": 1.3354, "step": 2422 }, { "epoch": 0.3562580408013233, "grad_norm": 0.6441057324409485, "learning_rate": 3.8597662771285475e-05, "loss": 1.1288, "step": 2423 }, { "epoch": 0.3564050725969491, "grad_norm": 0.6947935819625854, "learning_rate": 3.8530884808013355e-05, "loss": 1.2549, "step": 2424 }, { "epoch": 0.3565521043925749, "grad_norm": 0.5987950563430786, "learning_rate": 3.8464106844741235e-05, "loss": 1.1237, "step": 2425 }, { "epoch": 0.3566991361882007, "grad_norm": 0.6557531952857971, "learning_rate": 3.839732888146912e-05, "loss": 0.9596, "step": 2426 }, { "epoch": 0.3568461679838265, "grad_norm": 0.7100844383239746, "learning_rate": 3.8330550918196995e-05, "loss": 0.9543, "step": 2427 }, { "epoch": 0.3569931997794523, "grad_norm": 0.5723690390586853, "learning_rate": 3.8263772954924875e-05, "loss": 1.34, "step": 2428 }, { "epoch": 0.3571402315750781, "grad_norm": 0.6131441593170166, "learning_rate": 3.8196994991652755e-05, "loss": 1.0592, "step": 2429 }, { "epoch": 0.35728726337070393, "grad_norm": 0.46284812688827515, "learning_rate": 3.813021702838064e-05, "loss": 1.1438, "step": 2430 }, { "epoch": 0.35743429516632974, "grad_norm": 0.6052001714706421, "learning_rate": 3.8063439065108514e-05, "loss": 0.9973, "step": 2431 }, { "epoch": 0.35758132696195555, "grad_norm": 0.5532065629959106, "learning_rate": 3.7996661101836394e-05, "loss": 1.2369, "step": 2432 }, { "epoch": 0.3577283587575813, "grad_norm": 0.5707781314849854, "learning_rate": 3.7929883138564274e-05, "loss": 1.0799, "step": 2433 }, { "epoch": 0.3578753905532071, "grad_norm": 0.43618401885032654, "learning_rate": 3.7863105175292154e-05, "loss": 1.3067, "step": 2434 }, { "epoch": 0.3580224223488329, "grad_norm": 0.5196675658226013, "learning_rate": 3.7796327212020034e-05, "loss": 1.1269, "step": 2435 }, { "epoch": 0.3581694541444587, "grad_norm": 0.7638793587684631, "learning_rate": 3.7729549248747914e-05, "loss": 1.0902, "step": 2436 }, { "epoch": 0.35831648594008453, "grad_norm": 0.5897741913795471, "learning_rate": 3.7662771285475794e-05, "loss": 0.9561, "step": 2437 }, { "epoch": 0.35846351773571034, "grad_norm": 0.654977560043335, "learning_rate": 3.7595993322203674e-05, "loss": 1.1922, "step": 2438 }, { "epoch": 0.35861054953133614, "grad_norm": 0.4807330071926117, "learning_rate": 3.7529215358931554e-05, "loss": 1.6253, "step": 2439 }, { "epoch": 0.35875758132696195, "grad_norm": 0.4286400377750397, "learning_rate": 3.7462437395659434e-05, "loss": 1.661, "step": 2440 }, { "epoch": 0.35890461312258776, "grad_norm": 0.4760587513446808, "learning_rate": 3.7395659432387314e-05, "loss": 1.1488, "step": 2441 }, { "epoch": 0.35905164491821356, "grad_norm": 0.5634348392486572, "learning_rate": 3.7328881469115194e-05, "loss": 1.0405, "step": 2442 }, { "epoch": 0.35919867671383937, "grad_norm": 0.47320762276649475, "learning_rate": 3.7262103505843074e-05, "loss": 1.1608, "step": 2443 }, { "epoch": 0.3593457085094652, "grad_norm": 0.5028555989265442, "learning_rate": 3.7195325542570954e-05, "loss": 1.3969, "step": 2444 }, { "epoch": 0.359492740305091, "grad_norm": 0.4967988133430481, "learning_rate": 3.7128547579298833e-05, "loss": 1.0166, "step": 2445 }, { "epoch": 0.3596397721007168, "grad_norm": 0.6675366163253784, "learning_rate": 3.7061769616026713e-05, "loss": 1.166, "step": 2446 }, { "epoch": 0.3597868038963426, "grad_norm": 0.5802751183509827, "learning_rate": 3.699499165275459e-05, "loss": 1.1419, "step": 2447 }, { "epoch": 0.3599338356919684, "grad_norm": 0.4358658790588379, "learning_rate": 3.692821368948247e-05, "loss": 1.5554, "step": 2448 }, { "epoch": 0.3600808674875942, "grad_norm": 0.6354566812515259, "learning_rate": 3.686143572621035e-05, "loss": 1.1317, "step": 2449 }, { "epoch": 0.36022789928322, "grad_norm": 0.7282038331031799, "learning_rate": 3.679465776293823e-05, "loss": 1.0641, "step": 2450 }, { "epoch": 0.3603749310788458, "grad_norm": 0.6139276623725891, "learning_rate": 3.6727879799666106e-05, "loss": 1.1066, "step": 2451 }, { "epoch": 0.3605219628744716, "grad_norm": 0.5198124051094055, "learning_rate": 3.666110183639399e-05, "loss": 1.1393, "step": 2452 }, { "epoch": 0.3606689946700974, "grad_norm": 0.6176633834838867, "learning_rate": 3.659432387312187e-05, "loss": 1.2572, "step": 2453 }, { "epoch": 0.3608160264657232, "grad_norm": 0.4903654158115387, "learning_rate": 3.652754590984975e-05, "loss": 1.4956, "step": 2454 }, { "epoch": 0.360963058261349, "grad_norm": 0.4969039261341095, "learning_rate": 3.6460767946577626e-05, "loss": 1.2653, "step": 2455 }, { "epoch": 0.3611100900569748, "grad_norm": 0.716075599193573, "learning_rate": 3.639398998330551e-05, "loss": 1.1018, "step": 2456 }, { "epoch": 0.3612571218526006, "grad_norm": 0.5166922211647034, "learning_rate": 3.632721202003339e-05, "loss": 1.2694, "step": 2457 }, { "epoch": 0.3614041536482264, "grad_norm": 0.6394582986831665, "learning_rate": 3.626043405676127e-05, "loss": 1.1454, "step": 2458 }, { "epoch": 0.36155118544385223, "grad_norm": 0.4211079776287079, "learning_rate": 3.6193656093489146e-05, "loss": 1.5955, "step": 2459 }, { "epoch": 0.36169821723947804, "grad_norm": 0.48486754298210144, "learning_rate": 3.6126878130217026e-05, "loss": 1.0035, "step": 2460 }, { "epoch": 0.36184524903510384, "grad_norm": 0.4770890772342682, "learning_rate": 3.606010016694491e-05, "loss": 1.7147, "step": 2461 }, { "epoch": 0.36199228083072965, "grad_norm": 0.5580724477767944, "learning_rate": 3.599332220367279e-05, "loss": 1.0998, "step": 2462 }, { "epoch": 0.36213931262635546, "grad_norm": 0.8095806241035461, "learning_rate": 3.5926544240400665e-05, "loss": 0.9229, "step": 2463 }, { "epoch": 0.36228634442198127, "grad_norm": 0.5576737523078918, "learning_rate": 3.5859766277128545e-05, "loss": 1.6486, "step": 2464 }, { "epoch": 0.3624333762176071, "grad_norm": 0.4984843134880066, "learning_rate": 3.579298831385643e-05, "loss": 1.7552, "step": 2465 }, { "epoch": 0.3625804080132329, "grad_norm": 0.579366147518158, "learning_rate": 3.572621035058431e-05, "loss": 0.8157, "step": 2466 }, { "epoch": 0.3627274398088587, "grad_norm": 0.6493086218833923, "learning_rate": 3.5659432387312185e-05, "loss": 1.163, "step": 2467 }, { "epoch": 0.3628744716044845, "grad_norm": 0.6782344579696655, "learning_rate": 3.5592654424040065e-05, "loss": 1.059, "step": 2468 }, { "epoch": 0.3630215034001103, "grad_norm": 0.5620684027671814, "learning_rate": 3.5525876460767945e-05, "loss": 1.0418, "step": 2469 }, { "epoch": 0.36316853519573605, "grad_norm": 0.48011258244514465, "learning_rate": 3.545909849749583e-05, "loss": 1.1465, "step": 2470 }, { "epoch": 0.36331556699136186, "grad_norm": 0.4881964325904846, "learning_rate": 3.5392320534223705e-05, "loss": 1.6634, "step": 2471 }, { "epoch": 0.36346259878698767, "grad_norm": 0.5458020567893982, "learning_rate": 3.5325542570951585e-05, "loss": 0.851, "step": 2472 }, { "epoch": 0.3636096305826135, "grad_norm": 0.6420631408691406, "learning_rate": 3.5258764607679465e-05, "loss": 1.0542, "step": 2473 }, { "epoch": 0.3637566623782393, "grad_norm": 0.7362111210823059, "learning_rate": 3.519198664440735e-05, "loss": 1.0582, "step": 2474 }, { "epoch": 0.3639036941738651, "grad_norm": 0.7367462515830994, "learning_rate": 3.5125208681135225e-05, "loss": 1.15, "step": 2475 }, { "epoch": 0.3640507259694909, "grad_norm": 0.5474201440811157, "learning_rate": 3.5058430717863105e-05, "loss": 1.3628, "step": 2476 }, { "epoch": 0.3641977577651167, "grad_norm": 0.606412410736084, "learning_rate": 3.4991652754590984e-05, "loss": 1.2116, "step": 2477 }, { "epoch": 0.3643447895607425, "grad_norm": 0.6438230872154236, "learning_rate": 3.492487479131887e-05, "loss": 1.2331, "step": 2478 }, { "epoch": 0.3644918213563683, "grad_norm": 0.5651243925094604, "learning_rate": 3.4858096828046744e-05, "loss": 1.1871, "step": 2479 }, { "epoch": 0.3646388531519941, "grad_norm": 0.6541581749916077, "learning_rate": 3.4791318864774624e-05, "loss": 1.5205, "step": 2480 }, { "epoch": 0.36478588494761993, "grad_norm": 0.5034081339836121, "learning_rate": 3.4724540901502504e-05, "loss": 1.746, "step": 2481 }, { "epoch": 0.36493291674324574, "grad_norm": 0.4837198257446289, "learning_rate": 3.4657762938230384e-05, "loss": 1.3116, "step": 2482 }, { "epoch": 0.36507994853887155, "grad_norm": 0.6728594899177551, "learning_rate": 3.459098497495827e-05, "loss": 1.3104, "step": 2483 }, { "epoch": 0.36522698033449735, "grad_norm": 0.5988984704017639, "learning_rate": 3.4524207011686144e-05, "loss": 1.1026, "step": 2484 }, { "epoch": 0.36537401213012316, "grad_norm": 0.5185652375221252, "learning_rate": 3.4457429048414024e-05, "loss": 1.6483, "step": 2485 }, { "epoch": 0.36552104392574897, "grad_norm": 0.5622418522834778, "learning_rate": 3.4390651085141904e-05, "loss": 1.3347, "step": 2486 }, { "epoch": 0.3656680757213748, "grad_norm": 0.6840404868125916, "learning_rate": 3.432387312186979e-05, "loss": 1.115, "step": 2487 }, { "epoch": 0.3658151075170005, "grad_norm": 0.44170692563056946, "learning_rate": 3.4257095158597664e-05, "loss": 1.6526, "step": 2488 }, { "epoch": 0.36596213931262633, "grad_norm": 0.5814111828804016, "learning_rate": 3.4190317195325544e-05, "loss": 1.4333, "step": 2489 }, { "epoch": 0.36610917110825214, "grad_norm": 0.6652354598045349, "learning_rate": 3.4123539232053424e-05, "loss": 1.217, "step": 2490 }, { "epoch": 0.36625620290387795, "grad_norm": 0.7346956133842468, "learning_rate": 3.4056761268781303e-05, "loss": 1.0988, "step": 2491 }, { "epoch": 0.36640323469950375, "grad_norm": 0.576501727104187, "learning_rate": 3.3989983305509183e-05, "loss": 1.2446, "step": 2492 }, { "epoch": 0.36655026649512956, "grad_norm": 0.6412355899810791, "learning_rate": 3.392320534223706e-05, "loss": 1.0251, "step": 2493 }, { "epoch": 0.36669729829075537, "grad_norm": 0.42551058530807495, "learning_rate": 3.385642737896494e-05, "loss": 1.4446, "step": 2494 }, { "epoch": 0.3668443300863812, "grad_norm": 0.5112267732620239, "learning_rate": 3.378964941569282e-05, "loss": 1.1298, "step": 2495 }, { "epoch": 0.366991361882007, "grad_norm": 0.5427221655845642, "learning_rate": 3.37228714524207e-05, "loss": 0.9828, "step": 2496 }, { "epoch": 0.3671383936776328, "grad_norm": 0.5151547789573669, "learning_rate": 3.365609348914858e-05, "loss": 1.6385, "step": 2497 }, { "epoch": 0.3672854254732586, "grad_norm": 0.5735944509506226, "learning_rate": 3.358931552587646e-05, "loss": 1.097, "step": 2498 }, { "epoch": 0.3674324572688844, "grad_norm": 0.7624590396881104, "learning_rate": 3.352253756260434e-05, "loss": 1.0872, "step": 2499 }, { "epoch": 0.3675794890645102, "grad_norm": 0.6254386901855469, "learning_rate": 3.345575959933222e-05, "loss": 1.0631, "step": 2500 }, { "epoch": 0.367726520860136, "grad_norm": 0.7482060790061951, "learning_rate": 3.33889816360601e-05, "loss": 0.989, "step": 2501 }, { "epoch": 0.3678735526557618, "grad_norm": 0.6068713665008545, "learning_rate": 3.332220367278798e-05, "loss": 1.0091, "step": 2502 }, { "epoch": 0.36802058445138763, "grad_norm": 0.7387110590934753, "learning_rate": 3.325542570951586e-05, "loss": 1.0568, "step": 2503 }, { "epoch": 0.36816761624701344, "grad_norm": 0.7153123021125793, "learning_rate": 3.318864774624374e-05, "loss": 1.0122, "step": 2504 }, { "epoch": 0.36831464804263925, "grad_norm": 0.4413795471191406, "learning_rate": 3.312186978297162e-05, "loss": 1.7322, "step": 2505 }, { "epoch": 0.368461679838265, "grad_norm": 0.561302900314331, "learning_rate": 3.30550918196995e-05, "loss": 1.0254, "step": 2506 }, { "epoch": 0.3686087116338908, "grad_norm": 0.5234289169311523, "learning_rate": 3.298831385642738e-05, "loss": 0.8333, "step": 2507 }, { "epoch": 0.3687557434295166, "grad_norm": 0.48700568079948425, "learning_rate": 3.2921535893155256e-05, "loss": 1.1918, "step": 2508 }, { "epoch": 0.3689027752251424, "grad_norm": 0.5648956298828125, "learning_rate": 3.285475792988314e-05, "loss": 1.004, "step": 2509 }, { "epoch": 0.36904980702076823, "grad_norm": 0.6668082475662231, "learning_rate": 3.278797996661102e-05, "loss": 1.3005, "step": 2510 }, { "epoch": 0.36919683881639404, "grad_norm": 0.5677913427352905, "learning_rate": 3.27212020033389e-05, "loss": 1.1484, "step": 2511 }, { "epoch": 0.36934387061201984, "grad_norm": 0.44198891520500183, "learning_rate": 3.2654424040066775e-05, "loss": 1.6651, "step": 2512 }, { "epoch": 0.36949090240764565, "grad_norm": 0.4747164845466614, "learning_rate": 3.258764607679466e-05, "loss": 0.7549, "step": 2513 }, { "epoch": 0.36963793420327146, "grad_norm": 0.6893665790557861, "learning_rate": 3.252086811352254e-05, "loss": 1.1817, "step": 2514 }, { "epoch": 0.36978496599889726, "grad_norm": 0.6216636300086975, "learning_rate": 3.245409015025042e-05, "loss": 1.1139, "step": 2515 }, { "epoch": 0.36993199779452307, "grad_norm": 0.5247299671173096, "learning_rate": 3.2387312186978295e-05, "loss": 1.1155, "step": 2516 }, { "epoch": 0.3700790295901489, "grad_norm": 0.5363318920135498, "learning_rate": 3.2320534223706175e-05, "loss": 1.2311, "step": 2517 }, { "epoch": 0.3702260613857747, "grad_norm": 0.5654115080833435, "learning_rate": 3.225375626043406e-05, "loss": 1.049, "step": 2518 }, { "epoch": 0.3703730931814005, "grad_norm": 0.5353726744651794, "learning_rate": 3.218697829716194e-05, "loss": 1.5818, "step": 2519 }, { "epoch": 0.3705201249770263, "grad_norm": 0.45176467299461365, "learning_rate": 3.2120200333889815e-05, "loss": 1.4358, "step": 2520 }, { "epoch": 0.3706671567726521, "grad_norm": 0.612365186214447, "learning_rate": 3.2053422370617695e-05, "loss": 1.4531, "step": 2521 }, { "epoch": 0.3708141885682779, "grad_norm": 0.49134448170661926, "learning_rate": 3.198664440734558e-05, "loss": 1.3209, "step": 2522 }, { "epoch": 0.3709612203639037, "grad_norm": 0.5698893070220947, "learning_rate": 3.191986644407346e-05, "loss": 1.4881, "step": 2523 }, { "epoch": 0.3711082521595295, "grad_norm": 0.560930609703064, "learning_rate": 3.1853088480801334e-05, "loss": 1.2343, "step": 2524 }, { "epoch": 0.3712552839551553, "grad_norm": 0.5703999996185303, "learning_rate": 3.1786310517529214e-05, "loss": 1.0973, "step": 2525 }, { "epoch": 0.3714023157507811, "grad_norm": 0.5993462204933167, "learning_rate": 3.1719532554257094e-05, "loss": 1.4354, "step": 2526 }, { "epoch": 0.3715493475464069, "grad_norm": 0.5028145909309387, "learning_rate": 3.165275459098498e-05, "loss": 1.1981, "step": 2527 }, { "epoch": 0.3716963793420327, "grad_norm": 0.6053719520568848, "learning_rate": 3.1585976627712854e-05, "loss": 1.3432, "step": 2528 }, { "epoch": 0.3718434111376585, "grad_norm": 0.7020232081413269, "learning_rate": 3.1519198664440734e-05, "loss": 1.2178, "step": 2529 }, { "epoch": 0.3719904429332843, "grad_norm": 0.48608487844467163, "learning_rate": 3.1452420701168614e-05, "loss": 1.2507, "step": 2530 }, { "epoch": 0.3721374747289101, "grad_norm": 0.7640485167503357, "learning_rate": 3.13856427378965e-05, "loss": 0.9809, "step": 2531 }, { "epoch": 0.37228450652453593, "grad_norm": 0.5647515654563904, "learning_rate": 3.1318864774624374e-05, "loss": 1.2285, "step": 2532 }, { "epoch": 0.37243153832016174, "grad_norm": 0.7145013213157654, "learning_rate": 3.1252086811352254e-05, "loss": 1.1685, "step": 2533 }, { "epoch": 0.37257857011578754, "grad_norm": 0.6330011487007141, "learning_rate": 3.1185308848080134e-05, "loss": 1.1367, "step": 2534 }, { "epoch": 0.37272560191141335, "grad_norm": 0.6759470701217651, "learning_rate": 3.111853088480802e-05, "loss": 1.2263, "step": 2535 }, { "epoch": 0.37287263370703916, "grad_norm": 0.5074057579040527, "learning_rate": 3.1051752921535894e-05, "loss": 1.3741, "step": 2536 }, { "epoch": 0.37301966550266497, "grad_norm": 0.4660629332065582, "learning_rate": 3.0984974958263773e-05, "loss": 1.3461, "step": 2537 }, { "epoch": 0.3731666972982908, "grad_norm": 0.633834719657898, "learning_rate": 3.0918196994991653e-05, "loss": 0.8176, "step": 2538 }, { "epoch": 0.3733137290939166, "grad_norm": 0.561944305896759, "learning_rate": 3.085141903171953e-05, "loss": 1.1604, "step": 2539 }, { "epoch": 0.3734607608895424, "grad_norm": 0.7749508023262024, "learning_rate": 3.078464106844741e-05, "loss": 0.9553, "step": 2540 }, { "epoch": 0.3736077926851682, "grad_norm": 0.6799429655075073, "learning_rate": 3.071786310517529e-05, "loss": 1.4185, "step": 2541 }, { "epoch": 0.37375482448079395, "grad_norm": 0.5075151920318604, "learning_rate": 3.065108514190317e-05, "loss": 1.5976, "step": 2542 }, { "epoch": 0.37390185627641975, "grad_norm": 0.511060357093811, "learning_rate": 3.058430717863105e-05, "loss": 1.5459, "step": 2543 }, { "epoch": 0.37404888807204556, "grad_norm": 0.48519209027290344, "learning_rate": 3.051752921535893e-05, "loss": 1.3092, "step": 2544 }, { "epoch": 0.37419591986767137, "grad_norm": 0.7925971746444702, "learning_rate": 3.0450751252086813e-05, "loss": 0.8563, "step": 2545 }, { "epoch": 0.3743429516632972, "grad_norm": 0.650205135345459, "learning_rate": 3.0383973288814693e-05, "loss": 0.7301, "step": 2546 }, { "epoch": 0.374489983458923, "grad_norm": 0.4130078852176666, "learning_rate": 3.0317195325542573e-05, "loss": 1.4261, "step": 2547 }, { "epoch": 0.3746370152545488, "grad_norm": 0.46834516525268555, "learning_rate": 3.025041736227045e-05, "loss": 1.6337, "step": 2548 }, { "epoch": 0.3747840470501746, "grad_norm": 0.46545398235321045, "learning_rate": 3.0183639398998333e-05, "loss": 1.001, "step": 2549 }, { "epoch": 0.3749310788458004, "grad_norm": 0.595981776714325, "learning_rate": 3.0116861435726213e-05, "loss": 1.4412, "step": 2550 }, { "epoch": 0.3750781106414262, "grad_norm": 0.643929660320282, "learning_rate": 3.0050083472454093e-05, "loss": 1.32, "step": 2551 }, { "epoch": 0.375225142437052, "grad_norm": 0.7144728302955627, "learning_rate": 2.998330550918197e-05, "loss": 0.709, "step": 2552 }, { "epoch": 0.3753721742326778, "grad_norm": 0.618415355682373, "learning_rate": 2.991652754590985e-05, "loss": 1.1494, "step": 2553 }, { "epoch": 0.37551920602830363, "grad_norm": 0.6865541934967041, "learning_rate": 2.9849749582637732e-05, "loss": 1.2944, "step": 2554 }, { "epoch": 0.37566623782392944, "grad_norm": 0.4899437725543976, "learning_rate": 2.9782971619365612e-05, "loss": 1.2752, "step": 2555 }, { "epoch": 0.37581326961955525, "grad_norm": 0.6752919554710388, "learning_rate": 2.971619365609349e-05, "loss": 1.0614, "step": 2556 }, { "epoch": 0.37596030141518105, "grad_norm": 0.6753167510032654, "learning_rate": 2.964941569282137e-05, "loss": 1.0514, "step": 2557 }, { "epoch": 0.37610733321080686, "grad_norm": 0.8470339775085449, "learning_rate": 2.9582637729549252e-05, "loss": 1.2903, "step": 2558 }, { "epoch": 0.37625436500643267, "grad_norm": 0.5819979906082153, "learning_rate": 2.9515859766277132e-05, "loss": 1.0305, "step": 2559 }, { "epoch": 0.3764013968020584, "grad_norm": 0.7408475875854492, "learning_rate": 2.944908180300501e-05, "loss": 1.0335, "step": 2560 }, { "epoch": 0.3765484285976842, "grad_norm": 0.5864561796188354, "learning_rate": 2.938230383973289e-05, "loss": 1.2501, "step": 2561 }, { "epoch": 0.37669546039331003, "grad_norm": 0.6343914866447449, "learning_rate": 2.931552587646077e-05, "loss": 1.3041, "step": 2562 }, { "epoch": 0.37684249218893584, "grad_norm": 0.49309438467025757, "learning_rate": 2.924874791318865e-05, "loss": 1.0109, "step": 2563 }, { "epoch": 0.37698952398456165, "grad_norm": 0.5442574620246887, "learning_rate": 2.9181969949916528e-05, "loss": 1.1846, "step": 2564 }, { "epoch": 0.37713655578018745, "grad_norm": 0.6601904630661011, "learning_rate": 2.9115191986644408e-05, "loss": 0.8258, "step": 2565 }, { "epoch": 0.37728358757581326, "grad_norm": 0.4675993025302887, "learning_rate": 2.9048414023372288e-05, "loss": 1.2621, "step": 2566 }, { "epoch": 0.37743061937143907, "grad_norm": 0.5504015684127808, "learning_rate": 2.898163606010017e-05, "loss": 1.0307, "step": 2567 }, { "epoch": 0.3775776511670649, "grad_norm": 0.5736517906188965, "learning_rate": 2.8914858096828045e-05, "loss": 1.4307, "step": 2568 }, { "epoch": 0.3777246829626907, "grad_norm": 0.7945086359977722, "learning_rate": 2.8848080133555928e-05, "loss": 0.9593, "step": 2569 }, { "epoch": 0.3778717147583165, "grad_norm": 0.4597846567630768, "learning_rate": 2.8781302170283808e-05, "loss": 1.4155, "step": 2570 }, { "epoch": 0.3780187465539423, "grad_norm": 0.6355583667755127, "learning_rate": 2.8714524207011688e-05, "loss": 1.1502, "step": 2571 }, { "epoch": 0.3781657783495681, "grad_norm": 0.59587562084198, "learning_rate": 2.8647746243739564e-05, "loss": 1.0115, "step": 2572 }, { "epoch": 0.3783128101451939, "grad_norm": 0.7364314198493958, "learning_rate": 2.8580968280467448e-05, "loss": 1.1463, "step": 2573 }, { "epoch": 0.3784598419408197, "grad_norm": 0.6068917512893677, "learning_rate": 2.8514190317195328e-05, "loss": 1.1081, "step": 2574 }, { "epoch": 0.3786068737364455, "grad_norm": 0.7285560369491577, "learning_rate": 2.8447412353923207e-05, "loss": 0.9509, "step": 2575 }, { "epoch": 0.37875390553207133, "grad_norm": 0.6040778756141663, "learning_rate": 2.8380634390651084e-05, "loss": 1.0781, "step": 2576 }, { "epoch": 0.37890093732769714, "grad_norm": 0.620225191116333, "learning_rate": 2.8313856427378964e-05, "loss": 1.2592, "step": 2577 }, { "epoch": 0.3790479691233229, "grad_norm": 0.49409401416778564, "learning_rate": 2.8247078464106847e-05, "loss": 1.5277, "step": 2578 }, { "epoch": 0.3791950009189487, "grad_norm": 0.46937716007232666, "learning_rate": 2.8180300500834727e-05, "loss": 1.0915, "step": 2579 }, { "epoch": 0.3793420327145745, "grad_norm": 0.42692017555236816, "learning_rate": 2.8113522537562604e-05, "loss": 1.2609, "step": 2580 }, { "epoch": 0.3794890645102003, "grad_norm": 0.5466404557228088, "learning_rate": 2.8046744574290484e-05, "loss": 1.1212, "step": 2581 }, { "epoch": 0.3796360963058261, "grad_norm": 0.5111925601959229, "learning_rate": 2.7979966611018367e-05, "loss": 1.6013, "step": 2582 }, { "epoch": 0.37978312810145193, "grad_norm": 0.6703363656997681, "learning_rate": 2.7913188647746247e-05, "loss": 1.1202, "step": 2583 }, { "epoch": 0.37993015989707773, "grad_norm": 0.44196370244026184, "learning_rate": 2.7846410684474123e-05, "loss": 1.0942, "step": 2584 }, { "epoch": 0.38007719169270354, "grad_norm": 0.4990256428718567, "learning_rate": 2.7779632721202003e-05, "loss": 1.2116, "step": 2585 }, { "epoch": 0.38022422348832935, "grad_norm": 0.545623779296875, "learning_rate": 2.7712854757929883e-05, "loss": 1.155, "step": 2586 }, { "epoch": 0.38037125528395516, "grad_norm": 0.39881432056427, "learning_rate": 2.7646076794657767e-05, "loss": 1.1195, "step": 2587 }, { "epoch": 0.38051828707958096, "grad_norm": 0.7441054582595825, "learning_rate": 2.7579298831385643e-05, "loss": 1.1657, "step": 2588 }, { "epoch": 0.38066531887520677, "grad_norm": 0.5764066576957703, "learning_rate": 2.7512520868113523e-05, "loss": 1.845, "step": 2589 }, { "epoch": 0.3808123506708326, "grad_norm": 0.6514137387275696, "learning_rate": 2.7445742904841403e-05, "loss": 1.5025, "step": 2590 }, { "epoch": 0.3809593824664584, "grad_norm": 0.5704215168952942, "learning_rate": 2.7378964941569286e-05, "loss": 1.2048, "step": 2591 }, { "epoch": 0.3811064142620842, "grad_norm": 0.816928505897522, "learning_rate": 2.731218697829716e-05, "loss": 1.0151, "step": 2592 }, { "epoch": 0.38125344605771, "grad_norm": 0.6569788455963135, "learning_rate": 2.7245409015025043e-05, "loss": 1.1019, "step": 2593 }, { "epoch": 0.3814004778533358, "grad_norm": 0.39248692989349365, "learning_rate": 2.7178631051752923e-05, "loss": 1.7261, "step": 2594 }, { "epoch": 0.3815475096489616, "grad_norm": 0.6365862488746643, "learning_rate": 2.7111853088480803e-05, "loss": 1.0442, "step": 2595 }, { "epoch": 0.38169454144458737, "grad_norm": 0.6925631761550903, "learning_rate": 2.704507512520868e-05, "loss": 1.0083, "step": 2596 }, { "epoch": 0.3818415732402132, "grad_norm": 0.6050065159797668, "learning_rate": 2.6978297161936563e-05, "loss": 1.406, "step": 2597 }, { "epoch": 0.381988605035839, "grad_norm": 0.45651134848594666, "learning_rate": 2.6911519198664442e-05, "loss": 1.5604, "step": 2598 }, { "epoch": 0.3821356368314648, "grad_norm": 0.5140454173088074, "learning_rate": 2.6844741235392322e-05, "loss": 1.0131, "step": 2599 }, { "epoch": 0.3822826686270906, "grad_norm": 0.4572305679321289, "learning_rate": 2.67779632721202e-05, "loss": 1.0411, "step": 2600 }, { "epoch": 0.3824297004227164, "grad_norm": 0.6158120632171631, "learning_rate": 2.671118530884808e-05, "loss": 1.1526, "step": 2601 }, { "epoch": 0.3825767322183422, "grad_norm": 0.6998158097267151, "learning_rate": 2.6644407345575962e-05, "loss": 1.0308, "step": 2602 }, { "epoch": 0.382723764013968, "grad_norm": 0.5746186375617981, "learning_rate": 2.6577629382303842e-05, "loss": 1.2022, "step": 2603 }, { "epoch": 0.3828707958095938, "grad_norm": 0.5606583952903748, "learning_rate": 2.651085141903172e-05, "loss": 0.8629, "step": 2604 }, { "epoch": 0.38301782760521963, "grad_norm": 0.7778172492980957, "learning_rate": 2.64440734557596e-05, "loss": 1.0901, "step": 2605 }, { "epoch": 0.38316485940084544, "grad_norm": 0.437592476606369, "learning_rate": 2.6377295492487482e-05, "loss": 1.5652, "step": 2606 }, { "epoch": 0.38331189119647124, "grad_norm": 0.4443962574005127, "learning_rate": 2.6310517529215362e-05, "loss": 1.2968, "step": 2607 }, { "epoch": 0.38345892299209705, "grad_norm": 0.6878659725189209, "learning_rate": 2.624373956594324e-05, "loss": 1.3779, "step": 2608 }, { "epoch": 0.38360595478772286, "grad_norm": 0.5246115922927856, "learning_rate": 2.6176961602671118e-05, "loss": 0.9663, "step": 2609 }, { "epoch": 0.38375298658334867, "grad_norm": 0.5096486210823059, "learning_rate": 2.6110183639398998e-05, "loss": 1.4445, "step": 2610 }, { "epoch": 0.3839000183789745, "grad_norm": 0.5698012709617615, "learning_rate": 2.604340567612688e-05, "loss": 1.1302, "step": 2611 }, { "epoch": 0.3840470501746003, "grad_norm": 0.5383695960044861, "learning_rate": 2.597662771285476e-05, "loss": 1.0055, "step": 2612 }, { "epoch": 0.3841940819702261, "grad_norm": 0.47102800011634827, "learning_rate": 2.5909849749582638e-05, "loss": 1.2204, "step": 2613 }, { "epoch": 0.38434111376585184, "grad_norm": 0.4215308725833893, "learning_rate": 2.5843071786310518e-05, "loss": 1.1038, "step": 2614 }, { "epoch": 0.38448814556147765, "grad_norm": 0.5904027819633484, "learning_rate": 2.57762938230384e-05, "loss": 1.1162, "step": 2615 }, { "epoch": 0.38463517735710345, "grad_norm": 0.6563110947608948, "learning_rate": 2.570951585976628e-05, "loss": 1.0526, "step": 2616 }, { "epoch": 0.38478220915272926, "grad_norm": 0.5478011965751648, "learning_rate": 2.5642737896494158e-05, "loss": 1.7771, "step": 2617 }, { "epoch": 0.38492924094835507, "grad_norm": 0.4837952256202698, "learning_rate": 2.5575959933222038e-05, "loss": 1.0878, "step": 2618 }, { "epoch": 0.3850762727439809, "grad_norm": 0.5275688171386719, "learning_rate": 2.5509181969949918e-05, "loss": 1.3595, "step": 2619 }, { "epoch": 0.3852233045396067, "grad_norm": 0.5213336944580078, "learning_rate": 2.54424040066778e-05, "loss": 1.2964, "step": 2620 }, { "epoch": 0.3853703363352325, "grad_norm": 0.5752866864204407, "learning_rate": 2.5375626043405677e-05, "loss": 1.3265, "step": 2621 }, { "epoch": 0.3855173681308583, "grad_norm": 0.7548462748527527, "learning_rate": 2.5308848080133557e-05, "loss": 1.0368, "step": 2622 }, { "epoch": 0.3856643999264841, "grad_norm": 0.5092149972915649, "learning_rate": 2.5242070116861437e-05, "loss": 1.6592, "step": 2623 }, { "epoch": 0.3858114317221099, "grad_norm": 0.6510732173919678, "learning_rate": 2.517529215358932e-05, "loss": 0.7118, "step": 2624 }, { "epoch": 0.3859584635177357, "grad_norm": 0.5841689109802246, "learning_rate": 2.5108514190317194e-05, "loss": 1.109, "step": 2625 }, { "epoch": 0.3861054953133615, "grad_norm": 0.6288565993309021, "learning_rate": 2.5041736227045077e-05, "loss": 1.1903, "step": 2626 }, { "epoch": 0.38625252710898733, "grad_norm": 0.7097330689430237, "learning_rate": 2.4974958263772957e-05, "loss": 1.0267, "step": 2627 }, { "epoch": 0.38639955890461314, "grad_norm": 0.5920519232749939, "learning_rate": 2.4908180300500837e-05, "loss": 1.2631, "step": 2628 }, { "epoch": 0.38654659070023895, "grad_norm": 0.5546897053718567, "learning_rate": 2.4841402337228717e-05, "loss": 1.2084, "step": 2629 }, { "epoch": 0.38669362249586475, "grad_norm": 0.586908757686615, "learning_rate": 2.4774624373956597e-05, "loss": 0.9745, "step": 2630 }, { "epoch": 0.38684065429149056, "grad_norm": 0.49706023931503296, "learning_rate": 2.4707846410684477e-05, "loss": 1.6573, "step": 2631 }, { "epoch": 0.3869876860871163, "grad_norm": 0.6251803040504456, "learning_rate": 2.4641068447412353e-05, "loss": 1.0859, "step": 2632 }, { "epoch": 0.3871347178827421, "grad_norm": 0.6121665239334106, "learning_rate": 2.4574290484140237e-05, "loss": 0.9258, "step": 2633 }, { "epoch": 0.3872817496783679, "grad_norm": 0.5381894111633301, "learning_rate": 2.4507512520868113e-05, "loss": 1.0761, "step": 2634 }, { "epoch": 0.38742878147399373, "grad_norm": 0.5811814069747925, "learning_rate": 2.4440734557595996e-05, "loss": 1.1556, "step": 2635 }, { "epoch": 0.38757581326961954, "grad_norm": 0.605916440486908, "learning_rate": 2.4373956594323873e-05, "loss": 1.0735, "step": 2636 }, { "epoch": 0.38772284506524535, "grad_norm": 0.6532436013221741, "learning_rate": 2.4307178631051756e-05, "loss": 1.1236, "step": 2637 }, { "epoch": 0.38786987686087115, "grad_norm": 0.6118401288986206, "learning_rate": 2.4240400667779633e-05, "loss": 1.3728, "step": 2638 }, { "epoch": 0.38801690865649696, "grad_norm": 0.4702865183353424, "learning_rate": 2.4173622704507516e-05, "loss": 1.5931, "step": 2639 }, { "epoch": 0.38816394045212277, "grad_norm": 0.6645123362541199, "learning_rate": 2.4106844741235393e-05, "loss": 1.1565, "step": 2640 }, { "epoch": 0.3883109722477486, "grad_norm": 0.41092148423194885, "learning_rate": 2.4040066777963273e-05, "loss": 1.4205, "step": 2641 }, { "epoch": 0.3884580040433744, "grad_norm": 0.45163190364837646, "learning_rate": 2.3973288814691153e-05, "loss": 1.219, "step": 2642 }, { "epoch": 0.3886050358390002, "grad_norm": 0.8611518740653992, "learning_rate": 2.3906510851419033e-05, "loss": 0.917, "step": 2643 }, { "epoch": 0.388752067634626, "grad_norm": 0.5938888192176819, "learning_rate": 2.3839732888146912e-05, "loss": 1.2542, "step": 2644 }, { "epoch": 0.3888990994302518, "grad_norm": 0.5167170166969299, "learning_rate": 2.3772954924874792e-05, "loss": 1.0662, "step": 2645 }, { "epoch": 0.3890461312258776, "grad_norm": 0.5540469288825989, "learning_rate": 2.3706176961602672e-05, "loss": 0.9709, "step": 2646 }, { "epoch": 0.3891931630215034, "grad_norm": 0.6746082305908203, "learning_rate": 2.3639398998330552e-05, "loss": 0.8981, "step": 2647 }, { "epoch": 0.3893401948171292, "grad_norm": 0.5011353492736816, "learning_rate": 2.3572621035058432e-05, "loss": 0.9276, "step": 2648 }, { "epoch": 0.38948722661275503, "grad_norm": 0.7849153280258179, "learning_rate": 2.3505843071786312e-05, "loss": 1.1191, "step": 2649 }, { "epoch": 0.3896342584083808, "grad_norm": 0.6192378997802734, "learning_rate": 2.3439065108514192e-05, "loss": 0.9913, "step": 2650 }, { "epoch": 0.3897812902040066, "grad_norm": 0.35990431904792786, "learning_rate": 2.3372287145242072e-05, "loss": 1.5151, "step": 2651 }, { "epoch": 0.3899283219996324, "grad_norm": 0.47933903336524963, "learning_rate": 2.3305509181969952e-05, "loss": 1.5803, "step": 2652 }, { "epoch": 0.3900753537952582, "grad_norm": 0.4537132978439331, "learning_rate": 2.3238731218697832e-05, "loss": 1.4787, "step": 2653 }, { "epoch": 0.390222385590884, "grad_norm": 0.6734927892684937, "learning_rate": 2.3171953255425712e-05, "loss": 1.1337, "step": 2654 }, { "epoch": 0.3903694173865098, "grad_norm": 0.519097626209259, "learning_rate": 2.310517529215359e-05, "loss": 1.2472, "step": 2655 }, { "epoch": 0.3905164491821356, "grad_norm": 0.7624889612197876, "learning_rate": 2.3038397328881468e-05, "loss": 1.1503, "step": 2656 }, { "epoch": 0.39066348097776143, "grad_norm": 0.6018010377883911, "learning_rate": 2.297161936560935e-05, "loss": 1.1852, "step": 2657 }, { "epoch": 0.39081051277338724, "grad_norm": 0.6985217928886414, "learning_rate": 2.2904841402337228e-05, "loss": 0.9835, "step": 2658 }, { "epoch": 0.39095754456901305, "grad_norm": 0.5729690790176392, "learning_rate": 2.283806343906511e-05, "loss": 1.126, "step": 2659 }, { "epoch": 0.39110457636463886, "grad_norm": 0.4897937476634979, "learning_rate": 2.2771285475792988e-05, "loss": 1.0229, "step": 2660 }, { "epoch": 0.39125160816026466, "grad_norm": 0.4304359555244446, "learning_rate": 2.270450751252087e-05, "loss": 1.4721, "step": 2661 }, { "epoch": 0.39139863995589047, "grad_norm": 0.5089052319526672, "learning_rate": 2.2637729549248748e-05, "loss": 1.3706, "step": 2662 }, { "epoch": 0.3915456717515163, "grad_norm": 0.6415994167327881, "learning_rate": 2.257095158597663e-05, "loss": 0.9358, "step": 2663 }, { "epoch": 0.3916927035471421, "grad_norm": 0.7515066266059875, "learning_rate": 2.2504173622704508e-05, "loss": 1.202, "step": 2664 }, { "epoch": 0.3918397353427679, "grad_norm": 0.5684996247291565, "learning_rate": 2.2437395659432388e-05, "loss": 1.053, "step": 2665 }, { "epoch": 0.3919867671383937, "grad_norm": 0.5868377089500427, "learning_rate": 2.2370617696160268e-05, "loss": 1.2348, "step": 2666 }, { "epoch": 0.3921337989340195, "grad_norm": 0.7246077656745911, "learning_rate": 2.2303839732888147e-05, "loss": 1.0436, "step": 2667 }, { "epoch": 0.3922808307296453, "grad_norm": 0.4642273187637329, "learning_rate": 2.2237061769616027e-05, "loss": 1.9199, "step": 2668 }, { "epoch": 0.39242786252527107, "grad_norm": 0.6841215491294861, "learning_rate": 2.2170283806343907e-05, "loss": 1.0471, "step": 2669 }, { "epoch": 0.39257489432089687, "grad_norm": 0.5944216251373291, "learning_rate": 2.2103505843071787e-05, "loss": 1.0243, "step": 2670 }, { "epoch": 0.3927219261165227, "grad_norm": 0.39563724398612976, "learning_rate": 2.2036727879799667e-05, "loss": 1.5137, "step": 2671 }, { "epoch": 0.3928689579121485, "grad_norm": 0.540519654750824, "learning_rate": 2.1969949916527547e-05, "loss": 1.2857, "step": 2672 }, { "epoch": 0.3930159897077743, "grad_norm": 0.5819935202598572, "learning_rate": 2.1903171953255427e-05, "loss": 1.0857, "step": 2673 }, { "epoch": 0.3931630215034001, "grad_norm": 0.6601287722587585, "learning_rate": 2.1836393989983307e-05, "loss": 1.1274, "step": 2674 }, { "epoch": 0.3933100532990259, "grad_norm": 0.7445135712623596, "learning_rate": 2.1769616026711187e-05, "loss": 0.964, "step": 2675 }, { "epoch": 0.3934570850946517, "grad_norm": 0.7423242330551147, "learning_rate": 2.1702838063439067e-05, "loss": 1.1171, "step": 2676 }, { "epoch": 0.3936041168902775, "grad_norm": 0.7027772068977356, "learning_rate": 2.1636060100166947e-05, "loss": 1.1859, "step": 2677 }, { "epoch": 0.39375114868590333, "grad_norm": 0.5325579643249512, "learning_rate": 2.1569282136894827e-05, "loss": 1.0586, "step": 2678 }, { "epoch": 0.39389818048152914, "grad_norm": 0.40690648555755615, "learning_rate": 2.1502504173622707e-05, "loss": 1.0014, "step": 2679 }, { "epoch": 0.39404521227715494, "grad_norm": 0.5756933093070984, "learning_rate": 2.1435726210350583e-05, "loss": 1.181, "step": 2680 }, { "epoch": 0.39419224407278075, "grad_norm": 0.7624319791793823, "learning_rate": 2.1368948247078466e-05, "loss": 1.0473, "step": 2681 }, { "epoch": 0.39433927586840656, "grad_norm": 0.6878528594970703, "learning_rate": 2.1302170283806343e-05, "loss": 1.0396, "step": 2682 }, { "epoch": 0.39448630766403237, "grad_norm": 0.687721848487854, "learning_rate": 2.1235392320534226e-05, "loss": 1.2451, "step": 2683 }, { "epoch": 0.3946333394596582, "grad_norm": 0.442137748003006, "learning_rate": 2.1168614357262103e-05, "loss": 1.5876, "step": 2684 }, { "epoch": 0.394780371255284, "grad_norm": 0.6539183259010315, "learning_rate": 2.1101836393989986e-05, "loss": 1.0104, "step": 2685 }, { "epoch": 0.3949274030509098, "grad_norm": 0.4435972571372986, "learning_rate": 2.1035058430717863e-05, "loss": 1.1721, "step": 2686 }, { "epoch": 0.39507443484653554, "grad_norm": 0.5175235867500305, "learning_rate": 2.0968280467445746e-05, "loss": 1.1384, "step": 2687 }, { "epoch": 0.39522146664216135, "grad_norm": 0.6143773198127747, "learning_rate": 2.0901502504173623e-05, "loss": 1.5196, "step": 2688 }, { "epoch": 0.39536849843778715, "grad_norm": 0.7459739446640015, "learning_rate": 2.0834724540901503e-05, "loss": 0.985, "step": 2689 }, { "epoch": 0.39551553023341296, "grad_norm": 0.5660704374313354, "learning_rate": 2.0767946577629382e-05, "loss": 1.1287, "step": 2690 }, { "epoch": 0.39566256202903877, "grad_norm": 0.6166510581970215, "learning_rate": 2.0701168614357262e-05, "loss": 1.56, "step": 2691 }, { "epoch": 0.3958095938246646, "grad_norm": 0.6144928932189941, "learning_rate": 2.0634390651085142e-05, "loss": 0.9611, "step": 2692 }, { "epoch": 0.3959566256202904, "grad_norm": 0.6552537679672241, "learning_rate": 2.0567612687813022e-05, "loss": 1.1133, "step": 2693 }, { "epoch": 0.3961036574159162, "grad_norm": 0.4505479037761688, "learning_rate": 2.0500834724540902e-05, "loss": 1.6448, "step": 2694 }, { "epoch": 0.396250689211542, "grad_norm": 0.5055540204048157, "learning_rate": 2.0434056761268782e-05, "loss": 1.7185, "step": 2695 }, { "epoch": 0.3963977210071678, "grad_norm": 0.5297315716743469, "learning_rate": 2.0367278797996662e-05, "loss": 1.1755, "step": 2696 }, { "epoch": 0.3965447528027936, "grad_norm": 0.5798683762550354, "learning_rate": 2.0300500834724542e-05, "loss": 1.5559, "step": 2697 }, { "epoch": 0.3966917845984194, "grad_norm": 0.5888775587081909, "learning_rate": 2.0233722871452422e-05, "loss": 1.0013, "step": 2698 }, { "epoch": 0.3968388163940452, "grad_norm": 0.8410176634788513, "learning_rate": 2.0166944908180302e-05, "loss": 1.0665, "step": 2699 }, { "epoch": 0.39698584818967103, "grad_norm": 0.49537351727485657, "learning_rate": 2.0100166944908182e-05, "loss": 1.2967, "step": 2700 }, { "epoch": 0.39713287998529684, "grad_norm": 1.0065873861312866, "learning_rate": 2.003338898163606e-05, "loss": 0.8915, "step": 2701 }, { "epoch": 0.39727991178092265, "grad_norm": 0.735388994216919, "learning_rate": 1.9966611018363938e-05, "loss": 1.0102, "step": 2702 }, { "epoch": 0.39742694357654845, "grad_norm": 0.6341103911399841, "learning_rate": 1.989983305509182e-05, "loss": 1.1895, "step": 2703 }, { "epoch": 0.39757397537217426, "grad_norm": 0.4727029800415039, "learning_rate": 1.9833055091819698e-05, "loss": 0.9063, "step": 2704 }, { "epoch": 0.3977210071678, "grad_norm": 0.6398822069168091, "learning_rate": 1.976627712854758e-05, "loss": 1.0773, "step": 2705 }, { "epoch": 0.3978680389634258, "grad_norm": 0.656646192073822, "learning_rate": 1.9699499165275458e-05, "loss": 1.2104, "step": 2706 }, { "epoch": 0.3980150707590516, "grad_norm": 0.592746913433075, "learning_rate": 1.963272120200334e-05, "loss": 1.1681, "step": 2707 }, { "epoch": 0.39816210255467743, "grad_norm": 0.7133629322052002, "learning_rate": 1.9565943238731218e-05, "loss": 1.0386, "step": 2708 }, { "epoch": 0.39830913435030324, "grad_norm": 0.6200928092002869, "learning_rate": 1.94991652754591e-05, "loss": 1.0771, "step": 2709 }, { "epoch": 0.39845616614592905, "grad_norm": 0.5848642587661743, "learning_rate": 1.9432387312186978e-05, "loss": 1.1156, "step": 2710 }, { "epoch": 0.39860319794155485, "grad_norm": 0.7517028450965881, "learning_rate": 1.936560934891486e-05, "loss": 1.0735, "step": 2711 }, { "epoch": 0.39875022973718066, "grad_norm": 0.7014345526695251, "learning_rate": 1.9298831385642738e-05, "loss": 0.8978, "step": 2712 }, { "epoch": 0.39889726153280647, "grad_norm": 0.5111197829246521, "learning_rate": 1.9232053422370617e-05, "loss": 1.5798, "step": 2713 }, { "epoch": 0.3990442933284323, "grad_norm": 0.6293010711669922, "learning_rate": 1.9165275459098497e-05, "loss": 0.9435, "step": 2714 }, { "epoch": 0.3991913251240581, "grad_norm": 0.5737072229385376, "learning_rate": 1.9098497495826377e-05, "loss": 1.0366, "step": 2715 }, { "epoch": 0.3993383569196839, "grad_norm": 0.611175537109375, "learning_rate": 1.9031719532554257e-05, "loss": 0.8937, "step": 2716 }, { "epoch": 0.3994853887153097, "grad_norm": 0.6260928511619568, "learning_rate": 1.8964941569282137e-05, "loss": 1.0528, "step": 2717 }, { "epoch": 0.3996324205109355, "grad_norm": 0.5334596037864685, "learning_rate": 1.8898163606010017e-05, "loss": 0.9363, "step": 2718 }, { "epoch": 0.3997794523065613, "grad_norm": 0.4904210865497589, "learning_rate": 1.8831385642737897e-05, "loss": 1.0532, "step": 2719 }, { "epoch": 0.3999264841021871, "grad_norm": 0.37613916397094727, "learning_rate": 1.8764607679465777e-05, "loss": 1.4129, "step": 2720 }, { "epoch": 0.4000735158978129, "grad_norm": 0.5409479737281799, "learning_rate": 1.8697829716193657e-05, "loss": 0.8383, "step": 2721 }, { "epoch": 0.40022054769343873, "grad_norm": 0.5622455477714539, "learning_rate": 1.8631051752921537e-05, "loss": 1.0704, "step": 2722 }, { "epoch": 0.4003675794890645, "grad_norm": 0.6025890707969666, "learning_rate": 1.8564273789649417e-05, "loss": 1.4453, "step": 2723 }, { "epoch": 0.4005146112846903, "grad_norm": 0.6276631951332092, "learning_rate": 1.8497495826377297e-05, "loss": 1.5396, "step": 2724 }, { "epoch": 0.4006616430803161, "grad_norm": 0.5277634859085083, "learning_rate": 1.8430717863105177e-05, "loss": 1.1517, "step": 2725 }, { "epoch": 0.4008086748759419, "grad_norm": 0.5223104953765869, "learning_rate": 1.8363939899833053e-05, "loss": 1.5344, "step": 2726 }, { "epoch": 0.4009557066715677, "grad_norm": 0.5764372944831848, "learning_rate": 1.8297161936560936e-05, "loss": 1.248, "step": 2727 }, { "epoch": 0.4011027384671935, "grad_norm": 0.8572928309440613, "learning_rate": 1.8230383973288813e-05, "loss": 1.3219, "step": 2728 }, { "epoch": 0.4012497702628193, "grad_norm": 0.4557867646217346, "learning_rate": 1.8163606010016696e-05, "loss": 1.255, "step": 2729 }, { "epoch": 0.40139680205844513, "grad_norm": 0.7781767249107361, "learning_rate": 1.8096828046744573e-05, "loss": 1.1074, "step": 2730 }, { "epoch": 0.40154383385407094, "grad_norm": 0.6317264437675476, "learning_rate": 1.8030050083472456e-05, "loss": 1.2319, "step": 2731 }, { "epoch": 0.40169086564969675, "grad_norm": 0.45465198159217834, "learning_rate": 1.7963272120200333e-05, "loss": 1.5427, "step": 2732 }, { "epoch": 0.40183789744532256, "grad_norm": 0.7173395156860352, "learning_rate": 1.7896494156928216e-05, "loss": 1.4283, "step": 2733 }, { "epoch": 0.40198492924094836, "grad_norm": 0.5120325684547424, "learning_rate": 1.7829716193656093e-05, "loss": 1.4382, "step": 2734 }, { "epoch": 0.40213196103657417, "grad_norm": 0.6856223344802856, "learning_rate": 1.7762938230383973e-05, "loss": 1.1625, "step": 2735 }, { "epoch": 0.4022789928322, "grad_norm": 0.5144992470741272, "learning_rate": 1.7696160267111852e-05, "loss": 1.1235, "step": 2736 }, { "epoch": 0.4024260246278258, "grad_norm": 0.47391530871391296, "learning_rate": 1.7629382303839732e-05, "loss": 1.3963, "step": 2737 }, { "epoch": 0.4025730564234516, "grad_norm": 0.5011457204818726, "learning_rate": 1.7562604340567612e-05, "loss": 1.0244, "step": 2738 }, { "epoch": 0.4027200882190774, "grad_norm": 0.5550037026405334, "learning_rate": 1.7495826377295492e-05, "loss": 1.0721, "step": 2739 }, { "epoch": 0.4028671200147032, "grad_norm": 0.6020316481590271, "learning_rate": 1.7429048414023372e-05, "loss": 1.0052, "step": 2740 }, { "epoch": 0.40301415181032896, "grad_norm": 0.586837112903595, "learning_rate": 1.7362270450751252e-05, "loss": 1.151, "step": 2741 }, { "epoch": 0.40316118360595476, "grad_norm": 0.5786679983139038, "learning_rate": 1.7295492487479135e-05, "loss": 1.1324, "step": 2742 }, { "epoch": 0.40330821540158057, "grad_norm": 0.5363876819610596, "learning_rate": 1.7228714524207012e-05, "loss": 1.2566, "step": 2743 }, { "epoch": 0.4034552471972064, "grad_norm": 0.8747673630714417, "learning_rate": 1.7161936560934895e-05, "loss": 1.0337, "step": 2744 }, { "epoch": 0.4036022789928322, "grad_norm": 0.5169288516044617, "learning_rate": 1.7095158597662772e-05, "loss": 1.1378, "step": 2745 }, { "epoch": 0.403749310788458, "grad_norm": 0.5257635116577148, "learning_rate": 1.7028380634390652e-05, "loss": 1.1553, "step": 2746 }, { "epoch": 0.4038963425840838, "grad_norm": 0.4877997040748596, "learning_rate": 1.696160267111853e-05, "loss": 1.1682, "step": 2747 }, { "epoch": 0.4040433743797096, "grad_norm": 0.45819708704948425, "learning_rate": 1.689482470784641e-05, "loss": 1.2989, "step": 2748 }, { "epoch": 0.4041904061753354, "grad_norm": 0.5912356376647949, "learning_rate": 1.682804674457429e-05, "loss": 1.0105, "step": 2749 }, { "epoch": 0.4043374379709612, "grad_norm": 0.5346068143844604, "learning_rate": 1.676126878130217e-05, "loss": 1.1013, "step": 2750 }, { "epoch": 0.40448446976658703, "grad_norm": 0.6395894885063171, "learning_rate": 1.669449081803005e-05, "loss": 0.9204, "step": 2751 }, { "epoch": 0.40463150156221284, "grad_norm": 0.44383224844932556, "learning_rate": 1.662771285475793e-05, "loss": 1.0929, "step": 2752 }, { "epoch": 0.40477853335783864, "grad_norm": 0.4411461055278778, "learning_rate": 1.656093489148581e-05, "loss": 1.4224, "step": 2753 }, { "epoch": 0.40492556515346445, "grad_norm": 0.6798364520072937, "learning_rate": 1.649415692821369e-05, "loss": 1.378, "step": 2754 }, { "epoch": 0.40507259694909026, "grad_norm": 0.5776209235191345, "learning_rate": 1.642737896494157e-05, "loss": 0.9301, "step": 2755 }, { "epoch": 0.40521962874471606, "grad_norm": 0.7265199422836304, "learning_rate": 1.636060100166945e-05, "loss": 1.2501, "step": 2756 }, { "epoch": 0.40536666054034187, "grad_norm": 0.6869149804115295, "learning_rate": 1.629382303839733e-05, "loss": 0.9784, "step": 2757 }, { "epoch": 0.4055136923359677, "grad_norm": 0.7459145188331604, "learning_rate": 1.622704507512521e-05, "loss": 1.0165, "step": 2758 }, { "epoch": 0.40566072413159343, "grad_norm": 0.6545610427856445, "learning_rate": 1.6160267111853087e-05, "loss": 0.8227, "step": 2759 }, { "epoch": 0.40580775592721924, "grad_norm": 0.6906120181083679, "learning_rate": 1.609348914858097e-05, "loss": 1.0314, "step": 2760 }, { "epoch": 0.40595478772284505, "grad_norm": 0.6780456900596619, "learning_rate": 1.6026711185308847e-05, "loss": 1.1941, "step": 2761 }, { "epoch": 0.40610181951847085, "grad_norm": 0.43283557891845703, "learning_rate": 1.595993322203673e-05, "loss": 1.0075, "step": 2762 }, { "epoch": 0.40624885131409666, "grad_norm": 0.4161304831504822, "learning_rate": 1.5893155258764607e-05, "loss": 1.3658, "step": 2763 }, { "epoch": 0.40639588310972247, "grad_norm": 0.5931475758552551, "learning_rate": 1.582637729549249e-05, "loss": 1.3217, "step": 2764 }, { "epoch": 0.4065429149053483, "grad_norm": 0.649986207485199, "learning_rate": 1.5759599332220367e-05, "loss": 1.4323, "step": 2765 }, { "epoch": 0.4066899467009741, "grad_norm": 0.6392269730567932, "learning_rate": 1.569282136894825e-05, "loss": 1.2114, "step": 2766 }, { "epoch": 0.4068369784965999, "grad_norm": 0.5256012082099915, "learning_rate": 1.5626043405676127e-05, "loss": 1.4226, "step": 2767 }, { "epoch": 0.4069840102922257, "grad_norm": 0.8594573736190796, "learning_rate": 1.555926544240401e-05, "loss": 0.8528, "step": 2768 }, { "epoch": 0.4071310420878515, "grad_norm": 0.467526376247406, "learning_rate": 1.5492487479131887e-05, "loss": 1.2474, "step": 2769 }, { "epoch": 0.4072780738834773, "grad_norm": 0.5402677059173584, "learning_rate": 1.5425709515859767e-05, "loss": 1.1581, "step": 2770 }, { "epoch": 0.4074251056791031, "grad_norm": 0.6055418848991394, "learning_rate": 1.5358931552587647e-05, "loss": 1.245, "step": 2771 }, { "epoch": 0.4075721374747289, "grad_norm": 0.4349450170993805, "learning_rate": 1.5292153589315527e-05, "loss": 1.4208, "step": 2772 }, { "epoch": 0.40771916927035473, "grad_norm": 0.48851683735847473, "learning_rate": 1.5225375626043406e-05, "loss": 1.2193, "step": 2773 }, { "epoch": 0.40786620106598054, "grad_norm": 0.7697353363037109, "learning_rate": 1.5158597662771286e-05, "loss": 1.373, "step": 2774 }, { "epoch": 0.40801323286160635, "grad_norm": 0.5974040031433105, "learning_rate": 1.5091819699499166e-05, "loss": 1.1323, "step": 2775 }, { "epoch": 0.40816026465723215, "grad_norm": 0.4267256259918213, "learning_rate": 1.5025041736227046e-05, "loss": 1.5628, "step": 2776 }, { "epoch": 0.4083072964528579, "grad_norm": 0.46776053309440613, "learning_rate": 1.4958263772954924e-05, "loss": 1.1717, "step": 2777 }, { "epoch": 0.4084543282484837, "grad_norm": 0.4750649631023407, "learning_rate": 1.4891485809682806e-05, "loss": 1.6229, "step": 2778 }, { "epoch": 0.4086013600441095, "grad_norm": 0.4475225508213043, "learning_rate": 1.4824707846410684e-05, "loss": 1.2414, "step": 2779 }, { "epoch": 0.4087483918397353, "grad_norm": 0.561926543712616, "learning_rate": 1.4757929883138566e-05, "loss": 1.1541, "step": 2780 }, { "epoch": 0.40889542363536113, "grad_norm": 0.7548227310180664, "learning_rate": 1.4691151919866444e-05, "loss": 1.2544, "step": 2781 }, { "epoch": 0.40904245543098694, "grad_norm": 0.5744566321372986, "learning_rate": 1.4624373956594326e-05, "loss": 1.2338, "step": 2782 }, { "epoch": 0.40918948722661275, "grad_norm": 0.5210035443305969, "learning_rate": 1.4557595993322204e-05, "loss": 1.5056, "step": 2783 }, { "epoch": 0.40933651902223855, "grad_norm": 0.6017113924026489, "learning_rate": 1.4490818030050086e-05, "loss": 0.9753, "step": 2784 }, { "epoch": 0.40948355081786436, "grad_norm": 0.5180342197418213, "learning_rate": 1.4424040066777964e-05, "loss": 1.2736, "step": 2785 }, { "epoch": 0.40963058261349017, "grad_norm": 0.4665171504020691, "learning_rate": 1.4357262103505844e-05, "loss": 1.0066, "step": 2786 }, { "epoch": 0.409777614409116, "grad_norm": 0.4276520907878876, "learning_rate": 1.4290484140233724e-05, "loss": 1.6223, "step": 2787 }, { "epoch": 0.4099246462047418, "grad_norm": 0.6481426358222961, "learning_rate": 1.4223706176961604e-05, "loss": 0.9983, "step": 2788 }, { "epoch": 0.4100716780003676, "grad_norm": 0.466325968503952, "learning_rate": 1.4156928213689482e-05, "loss": 1.5908, "step": 2789 }, { "epoch": 0.4102187097959934, "grad_norm": 0.5459919571876526, "learning_rate": 1.4090150250417364e-05, "loss": 1.1191, "step": 2790 }, { "epoch": 0.4103657415916192, "grad_norm": 0.6080808639526367, "learning_rate": 1.4023372287145242e-05, "loss": 1.1343, "step": 2791 }, { "epoch": 0.410512773387245, "grad_norm": 0.6398499608039856, "learning_rate": 1.3956594323873123e-05, "loss": 1.051, "step": 2792 }, { "epoch": 0.4106598051828708, "grad_norm": 0.6501467227935791, "learning_rate": 1.3889816360601002e-05, "loss": 1.1132, "step": 2793 }, { "epoch": 0.4108068369784966, "grad_norm": 0.6673159003257751, "learning_rate": 1.3823038397328883e-05, "loss": 1.483, "step": 2794 }, { "epoch": 0.4109538687741224, "grad_norm": 0.4268486201763153, "learning_rate": 1.3756260434056762e-05, "loss": 1.1539, "step": 2795 }, { "epoch": 0.4111009005697482, "grad_norm": 0.4388333559036255, "learning_rate": 1.3689482470784643e-05, "loss": 1.74, "step": 2796 }, { "epoch": 0.411247932365374, "grad_norm": 0.590534508228302, "learning_rate": 1.3622704507512521e-05, "loss": 0.9337, "step": 2797 }, { "epoch": 0.4113949641609998, "grad_norm": 0.5055831670761108, "learning_rate": 1.3555926544240401e-05, "loss": 1.084, "step": 2798 }, { "epoch": 0.4115419959566256, "grad_norm": 0.386233925819397, "learning_rate": 1.3489148580968281e-05, "loss": 0.9835, "step": 2799 }, { "epoch": 0.4116890277522514, "grad_norm": 0.8051352500915527, "learning_rate": 1.3422370617696161e-05, "loss": 1.0127, "step": 2800 }, { "epoch": 0.4118360595478772, "grad_norm": 0.5791876316070557, "learning_rate": 1.335559265442404e-05, "loss": 0.8462, "step": 2801 }, { "epoch": 0.411983091343503, "grad_norm": 0.5889988541603088, "learning_rate": 1.3288814691151921e-05, "loss": 1.0424, "step": 2802 }, { "epoch": 0.41213012313912883, "grad_norm": 0.6549580693244934, "learning_rate": 1.32220367278798e-05, "loss": 0.9539, "step": 2803 }, { "epoch": 0.41227715493475464, "grad_norm": 0.5483449697494507, "learning_rate": 1.3155258764607681e-05, "loss": 1.1347, "step": 2804 }, { "epoch": 0.41242418673038045, "grad_norm": 0.5750336050987244, "learning_rate": 1.3088480801335559e-05, "loss": 1.2401, "step": 2805 }, { "epoch": 0.41257121852600626, "grad_norm": 0.5035088658332825, "learning_rate": 1.302170283806344e-05, "loss": 1.5903, "step": 2806 }, { "epoch": 0.41271825032163206, "grad_norm": 0.8480167388916016, "learning_rate": 1.2954924874791319e-05, "loss": 1.1923, "step": 2807 }, { "epoch": 0.41286528211725787, "grad_norm": 0.6199629306793213, "learning_rate": 1.28881469115192e-05, "loss": 0.8466, "step": 2808 }, { "epoch": 0.4130123139128837, "grad_norm": 0.6151009798049927, "learning_rate": 1.2821368948247079e-05, "loss": 1.0045, "step": 2809 }, { "epoch": 0.4131593457085095, "grad_norm": 0.6441823244094849, "learning_rate": 1.2754590984974959e-05, "loss": 1.3631, "step": 2810 }, { "epoch": 0.4133063775041353, "grad_norm": 0.6572909355163574, "learning_rate": 1.2687813021702839e-05, "loss": 0.8276, "step": 2811 }, { "epoch": 0.4134534092997611, "grad_norm": 0.545369029045105, "learning_rate": 1.2621035058430719e-05, "loss": 1.1532, "step": 2812 }, { "epoch": 0.41360044109538685, "grad_norm": 0.47440293431282043, "learning_rate": 1.2554257095158597e-05, "loss": 1.126, "step": 2813 }, { "epoch": 0.41374747289101266, "grad_norm": 0.49649617075920105, "learning_rate": 1.2487479131886479e-05, "loss": 0.8316, "step": 2814 }, { "epoch": 0.41389450468663846, "grad_norm": 0.5723249316215515, "learning_rate": 1.2420701168614358e-05, "loss": 1.2151, "step": 2815 }, { "epoch": 0.41404153648226427, "grad_norm": 0.4995276927947998, "learning_rate": 1.2353923205342238e-05, "loss": 1.0729, "step": 2816 }, { "epoch": 0.4141885682778901, "grad_norm": 0.5460335612297058, "learning_rate": 1.2287145242070118e-05, "loss": 1.1777, "step": 2817 }, { "epoch": 0.4143356000735159, "grad_norm": 0.45392119884490967, "learning_rate": 1.2220367278797998e-05, "loss": 1.3751, "step": 2818 }, { "epoch": 0.4144826318691417, "grad_norm": 0.5987601280212402, "learning_rate": 1.2153589315525878e-05, "loss": 1.0284, "step": 2819 }, { "epoch": 0.4146296636647675, "grad_norm": 0.5923938751220703, "learning_rate": 1.2086811352253758e-05, "loss": 1.1643, "step": 2820 }, { "epoch": 0.4147766954603933, "grad_norm": 0.4413999617099762, "learning_rate": 1.2020033388981636e-05, "loss": 1.5067, "step": 2821 }, { "epoch": 0.4149237272560191, "grad_norm": 0.6797595024108887, "learning_rate": 1.1953255425709516e-05, "loss": 0.9927, "step": 2822 }, { "epoch": 0.4150707590516449, "grad_norm": 0.6419900059700012, "learning_rate": 1.1886477462437396e-05, "loss": 1.4061, "step": 2823 }, { "epoch": 0.41521779084727073, "grad_norm": 0.5174911022186279, "learning_rate": 1.1819699499165276e-05, "loss": 1.2192, "step": 2824 }, { "epoch": 0.41536482264289654, "grad_norm": 0.4925398528575897, "learning_rate": 1.1752921535893156e-05, "loss": 1.6699, "step": 2825 }, { "epoch": 0.41551185443852234, "grad_norm": 0.5973262786865234, "learning_rate": 1.1686143572621036e-05, "loss": 0.9772, "step": 2826 }, { "epoch": 0.41565888623414815, "grad_norm": 0.45007947087287903, "learning_rate": 1.1619365609348916e-05, "loss": 0.8946, "step": 2827 }, { "epoch": 0.41580591802977396, "grad_norm": 0.484133243560791, "learning_rate": 1.1552587646076796e-05, "loss": 1.7339, "step": 2828 }, { "epoch": 0.41595294982539976, "grad_norm": 0.6650263071060181, "learning_rate": 1.1485809682804676e-05, "loss": 1.3795, "step": 2829 }, { "epoch": 0.41609998162102557, "grad_norm": 0.6364145278930664, "learning_rate": 1.1419031719532556e-05, "loss": 1.3509, "step": 2830 }, { "epoch": 0.4162470134166513, "grad_norm": 0.5995643138885498, "learning_rate": 1.1352253756260436e-05, "loss": 0.9731, "step": 2831 }, { "epoch": 0.41639404521227713, "grad_norm": 0.5012508034706116, "learning_rate": 1.1285475792988316e-05, "loss": 1.0535, "step": 2832 }, { "epoch": 0.41654107700790294, "grad_norm": 0.7457807064056396, "learning_rate": 1.1218697829716194e-05, "loss": 0.9925, "step": 2833 }, { "epoch": 0.41668810880352875, "grad_norm": 0.7422792315483093, "learning_rate": 1.1151919866444074e-05, "loss": 1.2807, "step": 2834 }, { "epoch": 0.41683514059915455, "grad_norm": 0.765460729598999, "learning_rate": 1.1085141903171954e-05, "loss": 1.1871, "step": 2835 }, { "epoch": 0.41698217239478036, "grad_norm": 0.6868448853492737, "learning_rate": 1.1018363939899834e-05, "loss": 0.9231, "step": 2836 }, { "epoch": 0.41712920419040617, "grad_norm": 0.6395427584648132, "learning_rate": 1.0951585976627714e-05, "loss": 1.157, "step": 2837 }, { "epoch": 0.417276235986032, "grad_norm": 0.5380874872207642, "learning_rate": 1.0884808013355593e-05, "loss": 1.6781, "step": 2838 }, { "epoch": 0.4174232677816578, "grad_norm": 0.6079760193824768, "learning_rate": 1.0818030050083473e-05, "loss": 0.9837, "step": 2839 }, { "epoch": 0.4175702995772836, "grad_norm": 0.7841283679008484, "learning_rate": 1.0751252086811353e-05, "loss": 1.2352, "step": 2840 }, { "epoch": 0.4177173313729094, "grad_norm": 0.6855496168136597, "learning_rate": 1.0684474123539233e-05, "loss": 0.8188, "step": 2841 }, { "epoch": 0.4178643631685352, "grad_norm": 0.6228190660476685, "learning_rate": 1.0617696160267113e-05, "loss": 1.0292, "step": 2842 }, { "epoch": 0.418011394964161, "grad_norm": 0.6482272744178772, "learning_rate": 1.0550918196994993e-05, "loss": 0.9068, "step": 2843 }, { "epoch": 0.4181584267597868, "grad_norm": 0.40743377804756165, "learning_rate": 1.0484140233722873e-05, "loss": 1.2651, "step": 2844 }, { "epoch": 0.4183054585554126, "grad_norm": 0.5265284180641174, "learning_rate": 1.0417362270450751e-05, "loss": 1.0059, "step": 2845 }, { "epoch": 0.41845249035103843, "grad_norm": 0.65744948387146, "learning_rate": 1.0350584307178631e-05, "loss": 1.0665, "step": 2846 }, { "epoch": 0.41859952214666424, "grad_norm": 0.480619877576828, "learning_rate": 1.0283806343906511e-05, "loss": 1.5103, "step": 2847 }, { "epoch": 0.41874655394229005, "grad_norm": 0.6947080492973328, "learning_rate": 1.0217028380634391e-05, "loss": 1.1619, "step": 2848 }, { "epoch": 0.4188935857379158, "grad_norm": 0.7651011943817139, "learning_rate": 1.0150250417362271e-05, "loss": 1.1076, "step": 2849 }, { "epoch": 0.4190406175335416, "grad_norm": 0.43801450729370117, "learning_rate": 1.0083472454090151e-05, "loss": 1.5465, "step": 2850 }, { "epoch": 0.4191876493291674, "grad_norm": 0.4728192090988159, "learning_rate": 1.001669449081803e-05, "loss": 1.1004, "step": 2851 }, { "epoch": 0.4193346811247932, "grad_norm": 0.5360897183418274, "learning_rate": 9.94991652754591e-06, "loss": 1.2582, "step": 2852 }, { "epoch": 0.419481712920419, "grad_norm": 0.4986843764781952, "learning_rate": 9.88313856427379e-06, "loss": 1.2034, "step": 2853 }, { "epoch": 0.41962874471604483, "grad_norm": 0.7464840412139893, "learning_rate": 9.81636060100167e-06, "loss": 1.1764, "step": 2854 }, { "epoch": 0.41977577651167064, "grad_norm": 0.6222577691078186, "learning_rate": 9.74958263772955e-06, "loss": 1.2247, "step": 2855 }, { "epoch": 0.41992280830729645, "grad_norm": 0.6130269169807434, "learning_rate": 9.68280467445743e-06, "loss": 1.2776, "step": 2856 }, { "epoch": 0.42006984010292225, "grad_norm": 0.6596737504005432, "learning_rate": 9.616026711185309e-06, "loss": 1.1267, "step": 2857 }, { "epoch": 0.42021687189854806, "grad_norm": 0.5947371125221252, "learning_rate": 9.549248747913189e-06, "loss": 1.1841, "step": 2858 }, { "epoch": 0.42036390369417387, "grad_norm": 0.6734382510185242, "learning_rate": 9.482470784641069e-06, "loss": 1.0973, "step": 2859 }, { "epoch": 0.4205109354897997, "grad_norm": 0.6168414950370789, "learning_rate": 9.415692821368949e-06, "loss": 1.3045, "step": 2860 }, { "epoch": 0.4206579672854255, "grad_norm": 0.664055585861206, "learning_rate": 9.348914858096828e-06, "loss": 1.1808, "step": 2861 }, { "epoch": 0.4208049990810513, "grad_norm": 0.4681093990802765, "learning_rate": 9.282136894824708e-06, "loss": 1.5375, "step": 2862 }, { "epoch": 0.4209520308766771, "grad_norm": 0.6716040968894958, "learning_rate": 9.215358931552588e-06, "loss": 1.0992, "step": 2863 }, { "epoch": 0.4210990626723029, "grad_norm": 0.6525077223777771, "learning_rate": 9.148580968280468e-06, "loss": 1.1275, "step": 2864 }, { "epoch": 0.4212460944679287, "grad_norm": 0.5994552373886108, "learning_rate": 9.081803005008348e-06, "loss": 1.3112, "step": 2865 }, { "epoch": 0.4213931262635545, "grad_norm": 0.6512040495872498, "learning_rate": 9.015025041736228e-06, "loss": 1.2114, "step": 2866 }, { "epoch": 0.4215401580591803, "grad_norm": 0.5004043579101562, "learning_rate": 8.948247078464108e-06, "loss": 1.2653, "step": 2867 }, { "epoch": 0.4216871898548061, "grad_norm": 0.5784372687339783, "learning_rate": 8.881469115191986e-06, "loss": 1.0712, "step": 2868 }, { "epoch": 0.4218342216504319, "grad_norm": 0.6421346664428711, "learning_rate": 8.814691151919866e-06, "loss": 1.0183, "step": 2869 }, { "epoch": 0.4219812534460577, "grad_norm": 0.4353748857975006, "learning_rate": 8.747913188647746e-06, "loss": 1.7222, "step": 2870 }, { "epoch": 0.4221282852416835, "grad_norm": 0.6469619870185852, "learning_rate": 8.681135225375626e-06, "loss": 1.4778, "step": 2871 }, { "epoch": 0.4222753170373093, "grad_norm": 0.6887294054031372, "learning_rate": 8.614357262103506e-06, "loss": 1.2261, "step": 2872 }, { "epoch": 0.4224223488329351, "grad_norm": 0.6883739233016968, "learning_rate": 8.547579298831386e-06, "loss": 1.2581, "step": 2873 }, { "epoch": 0.4225693806285609, "grad_norm": 0.6667659282684326, "learning_rate": 8.480801335559266e-06, "loss": 1.0899, "step": 2874 }, { "epoch": 0.4227164124241867, "grad_norm": 0.7326667904853821, "learning_rate": 8.414023372287146e-06, "loss": 1.0998, "step": 2875 }, { "epoch": 0.42286344421981253, "grad_norm": 0.4216342568397522, "learning_rate": 8.347245409015026e-06, "loss": 1.4911, "step": 2876 }, { "epoch": 0.42301047601543834, "grad_norm": 0.6450990438461304, "learning_rate": 8.280467445742906e-06, "loss": 1.022, "step": 2877 }, { "epoch": 0.42315750781106415, "grad_norm": 0.476899117231369, "learning_rate": 8.213689482470786e-06, "loss": 1.5409, "step": 2878 }, { "epoch": 0.42330453960668996, "grad_norm": 0.5528339147567749, "learning_rate": 8.146911519198665e-06, "loss": 1.1676, "step": 2879 }, { "epoch": 0.42345157140231576, "grad_norm": 0.6216089725494385, "learning_rate": 8.080133555926544e-06, "loss": 0.9121, "step": 2880 }, { "epoch": 0.42359860319794157, "grad_norm": 0.5689449310302734, "learning_rate": 8.013355592654424e-06, "loss": 1.2996, "step": 2881 }, { "epoch": 0.4237456349935674, "grad_norm": 0.42409664392471313, "learning_rate": 7.946577629382304e-06, "loss": 1.1287, "step": 2882 }, { "epoch": 0.4238926667891932, "grad_norm": 0.6241899728775024, "learning_rate": 7.879799666110184e-06, "loss": 1.5079, "step": 2883 }, { "epoch": 0.424039698584819, "grad_norm": 0.5232539772987366, "learning_rate": 7.813021702838063e-06, "loss": 1.4586, "step": 2884 }, { "epoch": 0.4241867303804448, "grad_norm": 0.7522298693656921, "learning_rate": 7.746243739565943e-06, "loss": 1.3063, "step": 2885 }, { "epoch": 0.42433376217607055, "grad_norm": 0.4429248869419098, "learning_rate": 7.679465776293823e-06, "loss": 1.6093, "step": 2886 }, { "epoch": 0.42448079397169636, "grad_norm": 0.590691089630127, "learning_rate": 7.612687813021703e-06, "loss": 1.1047, "step": 2887 }, { "epoch": 0.42462782576732216, "grad_norm": 0.591015100479126, "learning_rate": 7.545909849749583e-06, "loss": 1.2845, "step": 2888 }, { "epoch": 0.42477485756294797, "grad_norm": 0.77299565076828, "learning_rate": 7.479131886477462e-06, "loss": 1.0084, "step": 2889 }, { "epoch": 0.4249218893585738, "grad_norm": 0.5368993878364563, "learning_rate": 7.412353923205342e-06, "loss": 1.1644, "step": 2890 }, { "epoch": 0.4250689211541996, "grad_norm": 0.6597957611083984, "learning_rate": 7.345575959933222e-06, "loss": 0.868, "step": 2891 }, { "epoch": 0.4252159529498254, "grad_norm": 0.6459593176841736, "learning_rate": 7.278797996661102e-06, "loss": 0.9736, "step": 2892 }, { "epoch": 0.4253629847454512, "grad_norm": 0.5801615118980408, "learning_rate": 7.212020033388982e-06, "loss": 1.2734, "step": 2893 }, { "epoch": 0.425510016541077, "grad_norm": 0.5478801727294922, "learning_rate": 7.145242070116862e-06, "loss": 1.0734, "step": 2894 }, { "epoch": 0.4256570483367028, "grad_norm": 0.5716439485549927, "learning_rate": 7.078464106844741e-06, "loss": 1.0352, "step": 2895 }, { "epoch": 0.4258040801323286, "grad_norm": 0.5927345156669617, "learning_rate": 7.011686143572621e-06, "loss": 1.1948, "step": 2896 }, { "epoch": 0.42595111192795443, "grad_norm": 0.4756138324737549, "learning_rate": 6.944908180300501e-06, "loss": 1.4883, "step": 2897 }, { "epoch": 0.42609814372358024, "grad_norm": 0.3572428524494171, "learning_rate": 6.878130217028381e-06, "loss": 1.4724, "step": 2898 }, { "epoch": 0.42624517551920604, "grad_norm": 0.6112091541290283, "learning_rate": 6.811352253756261e-06, "loss": 0.9599, "step": 2899 }, { "epoch": 0.42639220731483185, "grad_norm": 0.7001116871833801, "learning_rate": 6.744574290484141e-06, "loss": 0.9395, "step": 2900 }, { "epoch": 0.42653923911045766, "grad_norm": 0.804387092590332, "learning_rate": 6.67779632721202e-06, "loss": 1.6236, "step": 2901 }, { "epoch": 0.42668627090608346, "grad_norm": 0.44176191091537476, "learning_rate": 6.6110183639399e-06, "loss": 1.0276, "step": 2902 }, { "epoch": 0.42683330270170927, "grad_norm": 0.5642232298851013, "learning_rate": 6.5442404006677796e-06, "loss": 1.1512, "step": 2903 }, { "epoch": 0.426980334497335, "grad_norm": 0.7091665863990784, "learning_rate": 6.4774624373956595e-06, "loss": 1.0842, "step": 2904 }, { "epoch": 0.42712736629296083, "grad_norm": 0.5194580554962158, "learning_rate": 6.4106844741235394e-06, "loss": 1.1038, "step": 2905 }, { "epoch": 0.42727439808858664, "grad_norm": 0.8388220071792603, "learning_rate": 6.343906510851419e-06, "loss": 1.1956, "step": 2906 }, { "epoch": 0.42742142988421244, "grad_norm": 0.8439841270446777, "learning_rate": 6.2771285475792984e-06, "loss": 0.9547, "step": 2907 }, { "epoch": 0.42756846167983825, "grad_norm": 0.5508288741111755, "learning_rate": 6.210350584307179e-06, "loss": 1.2691, "step": 2908 }, { "epoch": 0.42771549347546406, "grad_norm": 0.6738397479057312, "learning_rate": 6.143572621035059e-06, "loss": 1.0449, "step": 2909 }, { "epoch": 0.42786252527108987, "grad_norm": 0.5618143677711487, "learning_rate": 6.076794657762939e-06, "loss": 1.08, "step": 2910 }, { "epoch": 0.4280095570667157, "grad_norm": 0.6277570128440857, "learning_rate": 6.010016694490818e-06, "loss": 1.3122, "step": 2911 }, { "epoch": 0.4281565888623415, "grad_norm": 0.7393084764480591, "learning_rate": 5.943238731218698e-06, "loss": 1.0351, "step": 2912 }, { "epoch": 0.4283036206579673, "grad_norm": 0.6744592189788818, "learning_rate": 5.876460767946578e-06, "loss": 0.9756, "step": 2913 }, { "epoch": 0.4284506524535931, "grad_norm": 0.45362111926078796, "learning_rate": 5.809682804674458e-06, "loss": 1.2685, "step": 2914 }, { "epoch": 0.4285976842492189, "grad_norm": 0.5814924240112305, "learning_rate": 5.742904841402338e-06, "loss": 1.1748, "step": 2915 }, { "epoch": 0.4287447160448447, "grad_norm": 0.6471604704856873, "learning_rate": 5.676126878130218e-06, "loss": 1.3792, "step": 2916 }, { "epoch": 0.4288917478404705, "grad_norm": 0.4589155912399292, "learning_rate": 5.609348914858097e-06, "loss": 1.2364, "step": 2917 }, { "epoch": 0.4290387796360963, "grad_norm": 0.7562245726585388, "learning_rate": 5.542570951585977e-06, "loss": 0.8924, "step": 2918 }, { "epoch": 0.42918581143172213, "grad_norm": 0.46402707695961, "learning_rate": 5.475792988313857e-06, "loss": 1.3089, "step": 2919 }, { "epoch": 0.42933284322734794, "grad_norm": 0.48288366198539734, "learning_rate": 5.409015025041737e-06, "loss": 1.0613, "step": 2920 }, { "epoch": 0.42947987502297374, "grad_norm": 0.6452842950820923, "learning_rate": 5.342237061769617e-06, "loss": 1.0657, "step": 2921 }, { "epoch": 0.4296269068185995, "grad_norm": 0.5654491186141968, "learning_rate": 5.2754590984974965e-06, "loss": 0.8277, "step": 2922 }, { "epoch": 0.4297739386142253, "grad_norm": 0.6268603801727295, "learning_rate": 5.208681135225376e-06, "loss": 1.3188, "step": 2923 }, { "epoch": 0.4299209704098511, "grad_norm": 0.823740541934967, "learning_rate": 5.1419031719532556e-06, "loss": 0.8505, "step": 2924 }, { "epoch": 0.4300680022054769, "grad_norm": 0.7386543154716492, "learning_rate": 5.0751252086811355e-06, "loss": 1.2113, "step": 2925 }, { "epoch": 0.4302150340011027, "grad_norm": 0.5859724283218384, "learning_rate": 5.008347245409015e-06, "loss": 1.3551, "step": 2926 }, { "epoch": 0.43036206579672853, "grad_norm": 0.4175781011581421, "learning_rate": 4.941569282136895e-06, "loss": 1.4551, "step": 2927 }, { "epoch": 0.43050909759235434, "grad_norm": 0.7267006635665894, "learning_rate": 4.874791318864775e-06, "loss": 0.8221, "step": 2928 }, { "epoch": 0.43065612938798015, "grad_norm": 0.7819854617118835, "learning_rate": 4.808013355592654e-06, "loss": 1.2319, "step": 2929 }, { "epoch": 0.43080316118360595, "grad_norm": 0.579531192779541, "learning_rate": 4.741235392320534e-06, "loss": 1.0594, "step": 2930 }, { "epoch": 0.43095019297923176, "grad_norm": 0.5639491081237793, "learning_rate": 4.674457429048414e-06, "loss": 1.1395, "step": 2931 }, { "epoch": 0.43109722477485757, "grad_norm": 0.5892883539199829, "learning_rate": 4.607679465776294e-06, "loss": 1.1579, "step": 2932 }, { "epoch": 0.4312442565704834, "grad_norm": 0.5180469751358032, "learning_rate": 4.540901502504174e-06, "loss": 0.9862, "step": 2933 }, { "epoch": 0.4313912883661092, "grad_norm": 0.6282188892364502, "learning_rate": 4.474123539232054e-06, "loss": 1.2579, "step": 2934 }, { "epoch": 0.431538320161735, "grad_norm": 0.5224735736846924, "learning_rate": 4.407345575959933e-06, "loss": 1.2348, "step": 2935 }, { "epoch": 0.4316853519573608, "grad_norm": 0.5774785876274109, "learning_rate": 4.340567612687813e-06, "loss": 1.2779, "step": 2936 }, { "epoch": 0.4318323837529866, "grad_norm": 0.5390040874481201, "learning_rate": 4.273789649415693e-06, "loss": 1.1241, "step": 2937 }, { "epoch": 0.4319794155486124, "grad_norm": 0.6080900430679321, "learning_rate": 4.207011686143573e-06, "loss": 1.4262, "step": 2938 }, { "epoch": 0.4321264473442382, "grad_norm": 0.5194503664970398, "learning_rate": 4.140233722871453e-06, "loss": 1.1946, "step": 2939 }, { "epoch": 0.43227347913986397, "grad_norm": 0.6361677646636963, "learning_rate": 4.073455759599333e-06, "loss": 1.0618, "step": 2940 }, { "epoch": 0.4324205109354898, "grad_norm": 0.5843046307563782, "learning_rate": 4.006677796327212e-06, "loss": 1.1715, "step": 2941 }, { "epoch": 0.4325675427311156, "grad_norm": 0.7564122080802917, "learning_rate": 3.939899833055092e-06, "loss": 0.8586, "step": 2942 }, { "epoch": 0.4327145745267414, "grad_norm": 0.4432048797607422, "learning_rate": 3.873121869782972e-06, "loss": 1.1823, "step": 2943 }, { "epoch": 0.4328616063223672, "grad_norm": 0.58137446641922, "learning_rate": 3.8063439065108516e-06, "loss": 1.6214, "step": 2944 }, { "epoch": 0.433008638117993, "grad_norm": 0.6347450017929077, "learning_rate": 3.739565943238731e-06, "loss": 1.0245, "step": 2945 }, { "epoch": 0.4331556699136188, "grad_norm": 0.6903138756752014, "learning_rate": 3.672787979966611e-06, "loss": 1.1068, "step": 2946 }, { "epoch": 0.4333027017092446, "grad_norm": 0.5954134464263916, "learning_rate": 3.606010016694491e-06, "loss": 1.4749, "step": 2947 }, { "epoch": 0.4334497335048704, "grad_norm": 0.5564675331115723, "learning_rate": 3.5392320534223705e-06, "loss": 1.3033, "step": 2948 }, { "epoch": 0.43359676530049623, "grad_norm": 0.447492390871048, "learning_rate": 3.4724540901502504e-06, "loss": 1.1702, "step": 2949 }, { "epoch": 0.43374379709612204, "grad_norm": 0.47529488801956177, "learning_rate": 3.4056761268781303e-06, "loss": 1.1875, "step": 2950 }, { "epoch": 0.43389082889174785, "grad_norm": 0.5913407206535339, "learning_rate": 3.33889816360601e-06, "loss": 1.2712, "step": 2951 }, { "epoch": 0.43403786068737366, "grad_norm": 0.5256885290145874, "learning_rate": 3.2721202003338898e-06, "loss": 1.4034, "step": 2952 }, { "epoch": 0.43418489248299946, "grad_norm": 0.5218196511268616, "learning_rate": 3.2053422370617697e-06, "loss": 0.9917, "step": 2953 }, { "epoch": 0.43433192427862527, "grad_norm": 0.44108885526657104, "learning_rate": 3.1385642737896492e-06, "loss": 1.1847, "step": 2954 }, { "epoch": 0.4344789560742511, "grad_norm": 0.5063046813011169, "learning_rate": 3.0717863105175296e-06, "loss": 1.0793, "step": 2955 }, { "epoch": 0.4346259878698769, "grad_norm": 0.8227088451385498, "learning_rate": 3.005008347245409e-06, "loss": 1.4886, "step": 2956 }, { "epoch": 0.4347730196655027, "grad_norm": 0.4426083564758301, "learning_rate": 2.938230383973289e-06, "loss": 1.1735, "step": 2957 }, { "epoch": 0.43492005146112844, "grad_norm": 0.5662470459938049, "learning_rate": 2.871452420701169e-06, "loss": 1.1582, "step": 2958 }, { "epoch": 0.43506708325675425, "grad_norm": 0.5607680678367615, "learning_rate": 2.8046744574290484e-06, "loss": 0.8589, "step": 2959 }, { "epoch": 0.43521411505238006, "grad_norm": 0.6519868969917297, "learning_rate": 2.7378964941569284e-06, "loss": 1.0341, "step": 2960 }, { "epoch": 0.43536114684800586, "grad_norm": 0.4525550603866577, "learning_rate": 2.6711185308848083e-06, "loss": 1.8341, "step": 2961 }, { "epoch": 0.43550817864363167, "grad_norm": 0.5136383175849915, "learning_rate": 2.604340567612688e-06, "loss": 1.2018, "step": 2962 }, { "epoch": 0.4356552104392575, "grad_norm": 0.4787796139717102, "learning_rate": 2.5375626043405677e-06, "loss": 1.5627, "step": 2963 }, { "epoch": 0.4358022422348833, "grad_norm": 0.5559461712837219, "learning_rate": 2.4707846410684477e-06, "loss": 1.3876, "step": 2964 }, { "epoch": 0.4359492740305091, "grad_norm": 0.6563612222671509, "learning_rate": 2.404006677796327e-06, "loss": 0.9667, "step": 2965 }, { "epoch": 0.4360963058261349, "grad_norm": 0.6237492561340332, "learning_rate": 2.337228714524207e-06, "loss": 1.1258, "step": 2966 }, { "epoch": 0.4362433376217607, "grad_norm": 0.5238305330276489, "learning_rate": 2.270450751252087e-06, "loss": 1.2919, "step": 2967 }, { "epoch": 0.4363903694173865, "grad_norm": 0.7477308511734009, "learning_rate": 2.2036727879799665e-06, "loss": 1.3459, "step": 2968 }, { "epoch": 0.4365374012130123, "grad_norm": 0.44818630814552307, "learning_rate": 2.1368948247078465e-06, "loss": 0.9674, "step": 2969 }, { "epoch": 0.43668443300863813, "grad_norm": 0.5786495804786682, "learning_rate": 2.0701168614357264e-06, "loss": 1.125, "step": 2970 }, { "epoch": 0.43683146480426394, "grad_norm": 0.599043607711792, "learning_rate": 2.003338898163606e-06, "loss": 1.373, "step": 2971 }, { "epoch": 0.43697849659988974, "grad_norm": 0.648072361946106, "learning_rate": 1.936560934891486e-06, "loss": 1.0064, "step": 2972 }, { "epoch": 0.43712552839551555, "grad_norm": 0.7216872572898865, "learning_rate": 1.8697829716193656e-06, "loss": 0.9303, "step": 2973 }, { "epoch": 0.43727256019114136, "grad_norm": 0.6196619272232056, "learning_rate": 1.8030050083472455e-06, "loss": 1.1447, "step": 2974 }, { "epoch": 0.43741959198676716, "grad_norm": 0.5114753246307373, "learning_rate": 1.7362270450751252e-06, "loss": 1.4639, "step": 2975 }, { "epoch": 0.4375666237823929, "grad_norm": 0.789514422416687, "learning_rate": 1.669449081803005e-06, "loss": 1.222, "step": 2976 }, { "epoch": 0.4377136555780187, "grad_norm": 0.4615687429904938, "learning_rate": 1.6026711185308849e-06, "loss": 1.6559, "step": 2977 }, { "epoch": 0.43786068737364453, "grad_norm": 0.5584313869476318, "learning_rate": 1.5358931552587648e-06, "loss": 1.3168, "step": 2978 }, { "epoch": 0.43800771916927034, "grad_norm": 0.4953570067882538, "learning_rate": 1.4691151919866445e-06, "loss": 1.4382, "step": 2979 }, { "epoch": 0.43815475096489614, "grad_norm": 0.8009016513824463, "learning_rate": 1.4023372287145242e-06, "loss": 1.0538, "step": 2980 }, { "epoch": 0.43830178276052195, "grad_norm": 0.5448253750801086, "learning_rate": 1.3355592654424042e-06, "loss": 0.8472, "step": 2981 }, { "epoch": 0.43844881455614776, "grad_norm": 0.38562849164009094, "learning_rate": 1.2687813021702839e-06, "loss": 1.4624, "step": 2982 }, { "epoch": 0.43859584635177357, "grad_norm": 0.7461350560188293, "learning_rate": 1.2020033388981636e-06, "loss": 1.0105, "step": 2983 }, { "epoch": 0.4387428781473994, "grad_norm": 0.5085606575012207, "learning_rate": 1.1352253756260435e-06, "loss": 1.191, "step": 2984 }, { "epoch": 0.4388899099430252, "grad_norm": 0.5681290030479431, "learning_rate": 1.0684474123539232e-06, "loss": 1.1996, "step": 2985 }, { "epoch": 0.439036941738651, "grad_norm": 0.4466317296028137, "learning_rate": 1.001669449081803e-06, "loss": 1.4206, "step": 2986 }, { "epoch": 0.4391839735342768, "grad_norm": 0.6732923984527588, "learning_rate": 9.348914858096828e-07, "loss": 1.3061, "step": 2987 }, { "epoch": 0.4393310053299026, "grad_norm": 0.8050938248634338, "learning_rate": 8.681135225375626e-07, "loss": 0.9057, "step": 2988 }, { "epoch": 0.4394780371255284, "grad_norm": 0.6391505002975464, "learning_rate": 8.013355592654424e-07, "loss": 1.0067, "step": 2989 }, { "epoch": 0.4396250689211542, "grad_norm": 0.6685643792152405, "learning_rate": 7.345575959933223e-07, "loss": 1.0776, "step": 2990 }, { "epoch": 0.43977210071678, "grad_norm": 0.6509221792221069, "learning_rate": 6.677796327212021e-07, "loss": 1.2433, "step": 2991 }, { "epoch": 0.43991913251240583, "grad_norm": 0.515468418598175, "learning_rate": 6.010016694490818e-07, "loss": 1.1377, "step": 2992 }, { "epoch": 0.44006616430803164, "grad_norm": 0.6986977458000183, "learning_rate": 5.342237061769616e-07, "loss": 0.9219, "step": 2993 }, { "epoch": 0.4402131961036574, "grad_norm": 0.51728355884552, "learning_rate": 4.674457429048414e-07, "loss": 1.4642, "step": 2994 }, { "epoch": 0.4403602278992832, "grad_norm": 0.38782957196235657, "learning_rate": 4.006677796327212e-07, "loss": 1.2189, "step": 2995 }, { "epoch": 0.440507259694909, "grad_norm": 0.5731973648071289, "learning_rate": 3.3388981636060104e-07, "loss": 1.1463, "step": 2996 }, { "epoch": 0.4406542914905348, "grad_norm": 0.46806904673576355, "learning_rate": 2.671118530884808e-07, "loss": 1.2181, "step": 2997 }, { "epoch": 0.4408013232861606, "grad_norm": 0.6697878241539001, "learning_rate": 2.003338898163606e-07, "loss": 1.4151, "step": 2998 }, { "epoch": 0.4409483550817864, "grad_norm": 0.37280604243278503, "learning_rate": 1.335559265442404e-07, "loss": 1.8518, "step": 2999 }, { "epoch": 0.44109538687741223, "grad_norm": 0.39049333333969116, "learning_rate": 6.67779632721202e-08, "loss": 1.474, "step": 3000 }, { "epoch": 0.44124241867303804, "grad_norm": 0.5715314745903015, "learning_rate": 0.0, "loss": 0.937, "step": 3001 }, { "epoch": 0.44138945046866385, "grad_norm": 0.5309827923774719, "learning_rate": 8.004004004004005e-05, "loss": 1.3005, "step": 3002 }, { "epoch": 0.44153648226428965, "grad_norm": 0.4774700999259949, "learning_rate": 8e-05, "loss": 1.4273, "step": 3003 }, { "epoch": 0.44168351405991546, "grad_norm": 0.41628900170326233, "learning_rate": 7.995995995995996e-05, "loss": 1.2931, "step": 3004 }, { "epoch": 0.44183054585554127, "grad_norm": 0.729850709438324, "learning_rate": 7.991991991991992e-05, "loss": 1.2846, "step": 3005 }, { "epoch": 0.4419775776511671, "grad_norm": 0.5727280974388123, "learning_rate": 7.987987987987988e-05, "loss": 1.0101, "step": 3006 }, { "epoch": 0.4421246094467929, "grad_norm": 0.5235925912857056, "learning_rate": 7.983983983983985e-05, "loss": 1.4118, "step": 3007 }, { "epoch": 0.4422716412424187, "grad_norm": 0.7491974234580994, "learning_rate": 7.979979979979981e-05, "loss": 0.9614, "step": 3008 }, { "epoch": 0.4424186730380445, "grad_norm": 0.47199922800064087, "learning_rate": 7.975975975975976e-05, "loss": 1.0418, "step": 3009 }, { "epoch": 0.4425657048336703, "grad_norm": 0.4672444462776184, "learning_rate": 7.971971971971972e-05, "loss": 1.2079, "step": 3010 }, { "epoch": 0.4427127366292961, "grad_norm": 0.4790749251842499, "learning_rate": 7.967967967967969e-05, "loss": 1.2103, "step": 3011 }, { "epoch": 0.44285976842492186, "grad_norm": 0.52845698595047, "learning_rate": 7.963963963963964e-05, "loss": 0.9237, "step": 3012 }, { "epoch": 0.44300680022054767, "grad_norm": 0.4317225515842438, "learning_rate": 7.959959959959961e-05, "loss": 1.2409, "step": 3013 }, { "epoch": 0.4431538320161735, "grad_norm": 0.5532808303833008, "learning_rate": 7.955955955955956e-05, "loss": 1.1717, "step": 3014 }, { "epoch": 0.4433008638117993, "grad_norm": 0.6352879405021667, "learning_rate": 7.951951951951952e-05, "loss": 1.3647, "step": 3015 }, { "epoch": 0.4434478956074251, "grad_norm": 0.5172725319862366, "learning_rate": 7.947947947947948e-05, "loss": 1.1391, "step": 3016 }, { "epoch": 0.4435949274030509, "grad_norm": 0.42478322982788086, "learning_rate": 7.943943943943945e-05, "loss": 1.0662, "step": 3017 }, { "epoch": 0.4437419591986767, "grad_norm": 0.5734497308731079, "learning_rate": 7.93993993993994e-05, "loss": 0.8885, "step": 3018 }, { "epoch": 0.4438889909943025, "grad_norm": 0.581440269947052, "learning_rate": 7.935935935935936e-05, "loss": 1.1088, "step": 3019 }, { "epoch": 0.4440360227899283, "grad_norm": 0.6368793249130249, "learning_rate": 7.931931931931932e-05, "loss": 1.0159, "step": 3020 }, { "epoch": 0.4441830545855541, "grad_norm": 0.6039066910743713, "learning_rate": 7.927927927927928e-05, "loss": 1.0121, "step": 3021 }, { "epoch": 0.44433008638117993, "grad_norm": 0.4598219096660614, "learning_rate": 7.923923923923924e-05, "loss": 1.1479, "step": 3022 }, { "epoch": 0.44447711817680574, "grad_norm": 0.7323593497276306, "learning_rate": 7.919919919919921e-05, "loss": 1.3472, "step": 3023 }, { "epoch": 0.44462414997243155, "grad_norm": 0.5465438365936279, "learning_rate": 7.915915915915915e-05, "loss": 1.2732, "step": 3024 }, { "epoch": 0.44477118176805736, "grad_norm": 0.5182192921638489, "learning_rate": 7.911911911911912e-05, "loss": 1.4627, "step": 3025 }, { "epoch": 0.44491821356368316, "grad_norm": 0.6892410516738892, "learning_rate": 7.90790790790791e-05, "loss": 1.0624, "step": 3026 }, { "epoch": 0.44506524535930897, "grad_norm": 0.6542565822601318, "learning_rate": 7.903903903903904e-05, "loss": 0.9358, "step": 3027 }, { "epoch": 0.4452122771549348, "grad_norm": 0.776448667049408, "learning_rate": 7.899899899899901e-05, "loss": 1.137, "step": 3028 }, { "epoch": 0.4453593089505606, "grad_norm": 0.6796862483024597, "learning_rate": 7.895895895895897e-05, "loss": 1.2234, "step": 3029 }, { "epoch": 0.44550634074618634, "grad_norm": 0.46503594517707825, "learning_rate": 7.891891891891892e-05, "loss": 1.553, "step": 3030 }, { "epoch": 0.44565337254181214, "grad_norm": 0.5702188014984131, "learning_rate": 7.887887887887888e-05, "loss": 1.3981, "step": 3031 }, { "epoch": 0.44580040433743795, "grad_norm": 0.7000522017478943, "learning_rate": 7.883883883883885e-05, "loss": 1.3634, "step": 3032 }, { "epoch": 0.44594743613306376, "grad_norm": 0.6187648177146912, "learning_rate": 7.87987987987988e-05, "loss": 1.1661, "step": 3033 }, { "epoch": 0.44609446792868956, "grad_norm": 0.46628549695014954, "learning_rate": 7.875875875875877e-05, "loss": 1.1603, "step": 3034 }, { "epoch": 0.44624149972431537, "grad_norm": 0.7719597816467285, "learning_rate": 7.871871871871872e-05, "loss": 0.9679, "step": 3035 }, { "epoch": 0.4463885315199412, "grad_norm": 0.5614684224128723, "learning_rate": 7.867867867867868e-05, "loss": 1.2294, "step": 3036 }, { "epoch": 0.446535563315567, "grad_norm": 0.5791134238243103, "learning_rate": 7.863863863863864e-05, "loss": 1.3328, "step": 3037 }, { "epoch": 0.4466825951111928, "grad_norm": 0.6847330331802368, "learning_rate": 7.859859859859861e-05, "loss": 1.0744, "step": 3038 }, { "epoch": 0.4468296269068186, "grad_norm": 0.4809703230857849, "learning_rate": 7.855855855855857e-05, "loss": 1.5145, "step": 3039 }, { "epoch": 0.4469766587024444, "grad_norm": 0.532860279083252, "learning_rate": 7.851851851851852e-05, "loss": 1.0526, "step": 3040 }, { "epoch": 0.4471236904980702, "grad_norm": 0.4584046006202698, "learning_rate": 7.847847847847848e-05, "loss": 0.8038, "step": 3041 }, { "epoch": 0.447270722293696, "grad_norm": 0.5781496167182922, "learning_rate": 7.843843843843844e-05, "loss": 0.9686, "step": 3042 }, { "epoch": 0.44741775408932183, "grad_norm": 0.6033342480659485, "learning_rate": 7.83983983983984e-05, "loss": 1.3918, "step": 3043 }, { "epoch": 0.44756478588494764, "grad_norm": 0.49098268151283264, "learning_rate": 7.835835835835837e-05, "loss": 1.4349, "step": 3044 }, { "epoch": 0.44771181768057344, "grad_norm": 0.6450899839401245, "learning_rate": 7.831831831831832e-05, "loss": 1.1867, "step": 3045 }, { "epoch": 0.44785884947619925, "grad_norm": 0.7272488474845886, "learning_rate": 7.827827827827828e-05, "loss": 1.0154, "step": 3046 }, { "epoch": 0.44800588127182506, "grad_norm": 0.4990086257457733, "learning_rate": 7.823823823823824e-05, "loss": 1.4096, "step": 3047 }, { "epoch": 0.4481529130674508, "grad_norm": 0.6434330344200134, "learning_rate": 7.819819819819821e-05, "loss": 0.9766, "step": 3048 }, { "epoch": 0.4482999448630766, "grad_norm": 0.592932939529419, "learning_rate": 7.815815815815815e-05, "loss": 1.1641, "step": 3049 }, { "epoch": 0.4484469766587024, "grad_norm": 0.7786630392074585, "learning_rate": 7.811811811811812e-05, "loss": 0.9239, "step": 3050 }, { "epoch": 0.44859400845432823, "grad_norm": 0.6199022531509399, "learning_rate": 7.807807807807808e-05, "loss": 1.0906, "step": 3051 }, { "epoch": 0.44874104024995404, "grad_norm": 0.6383281350135803, "learning_rate": 7.803803803803804e-05, "loss": 0.9988, "step": 3052 }, { "epoch": 0.44888807204557984, "grad_norm": 0.5807875394821167, "learning_rate": 7.799799799799801e-05, "loss": 1.6243, "step": 3053 }, { "epoch": 0.44903510384120565, "grad_norm": 0.7127140760421753, "learning_rate": 7.795795795795797e-05, "loss": 1.0156, "step": 3054 }, { "epoch": 0.44918213563683146, "grad_norm": 0.6495125889778137, "learning_rate": 7.791791791791792e-05, "loss": 0.9925, "step": 3055 }, { "epoch": 0.44932916743245727, "grad_norm": 0.7229060530662537, "learning_rate": 7.787787787787788e-05, "loss": 1.2746, "step": 3056 }, { "epoch": 0.4494761992280831, "grad_norm": 0.4695928990840912, "learning_rate": 7.783783783783785e-05, "loss": 1.5828, "step": 3057 }, { "epoch": 0.4496232310237089, "grad_norm": 0.6969085931777954, "learning_rate": 7.77977977977978e-05, "loss": 1.2474, "step": 3058 }, { "epoch": 0.4497702628193347, "grad_norm": 0.6731665730476379, "learning_rate": 7.775775775775777e-05, "loss": 0.9767, "step": 3059 }, { "epoch": 0.4499172946149605, "grad_norm": 0.5919747948646545, "learning_rate": 7.771771771771772e-05, "loss": 1.4402, "step": 3060 }, { "epoch": 0.4500643264105863, "grad_norm": 0.644991397857666, "learning_rate": 7.767767767767768e-05, "loss": 1.0291, "step": 3061 }, { "epoch": 0.4502113582062121, "grad_norm": 0.5303351283073425, "learning_rate": 7.763763763763764e-05, "loss": 1.0363, "step": 3062 }, { "epoch": 0.4503583900018379, "grad_norm": 0.7129048705101013, "learning_rate": 7.759759759759761e-05, "loss": 0.9641, "step": 3063 }, { "epoch": 0.4505054217974637, "grad_norm": 0.5903252959251404, "learning_rate": 7.755755755755755e-05, "loss": 1.2438, "step": 3064 }, { "epoch": 0.45065245359308953, "grad_norm": 0.41784965991973877, "learning_rate": 7.751751751751752e-05, "loss": 0.7716, "step": 3065 }, { "epoch": 0.4507994853887153, "grad_norm": 0.6161693930625916, "learning_rate": 7.747747747747748e-05, "loss": 0.7939, "step": 3066 }, { "epoch": 0.4509465171843411, "grad_norm": 0.5145079493522644, "learning_rate": 7.743743743743744e-05, "loss": 1.2065, "step": 3067 }, { "epoch": 0.4510935489799669, "grad_norm": 0.6847787499427795, "learning_rate": 7.73973973973974e-05, "loss": 0.9696, "step": 3068 }, { "epoch": 0.4512405807755927, "grad_norm": 0.5431666374206543, "learning_rate": 7.735735735735737e-05, "loss": 1.2866, "step": 3069 }, { "epoch": 0.4513876125712185, "grad_norm": 0.5589327812194824, "learning_rate": 7.731731731731731e-05, "loss": 1.539, "step": 3070 }, { "epoch": 0.4515346443668443, "grad_norm": 0.551093339920044, "learning_rate": 7.727727727727728e-05, "loss": 1.3534, "step": 3071 }, { "epoch": 0.4516816761624701, "grad_norm": 0.6674676537513733, "learning_rate": 7.723723723723724e-05, "loss": 1.1153, "step": 3072 }, { "epoch": 0.45182870795809593, "grad_norm": 0.3867219388484955, "learning_rate": 7.71971971971972e-05, "loss": 1.4667, "step": 3073 }, { "epoch": 0.45197573975372174, "grad_norm": 0.6534422039985657, "learning_rate": 7.715715715715715e-05, "loss": 1.4959, "step": 3074 }, { "epoch": 0.45212277154934755, "grad_norm": 0.7592636942863464, "learning_rate": 7.711711711711713e-05, "loss": 1.0485, "step": 3075 }, { "epoch": 0.45226980334497335, "grad_norm": 0.7680928111076355, "learning_rate": 7.707707707707707e-05, "loss": 1.0106, "step": 3076 }, { "epoch": 0.45241683514059916, "grad_norm": 0.5526341199874878, "learning_rate": 7.703703703703704e-05, "loss": 1.3832, "step": 3077 }, { "epoch": 0.45256386693622497, "grad_norm": 0.5713878870010376, "learning_rate": 7.699699699699701e-05, "loss": 1.2133, "step": 3078 }, { "epoch": 0.4527108987318508, "grad_norm": 0.7178694605827332, "learning_rate": 7.695695695695695e-05, "loss": 1.0356, "step": 3079 }, { "epoch": 0.4528579305274766, "grad_norm": 0.5467953085899353, "learning_rate": 7.691691691691693e-05, "loss": 0.9599, "step": 3080 }, { "epoch": 0.4530049623231024, "grad_norm": 0.47246530652046204, "learning_rate": 7.687687687687688e-05, "loss": 1.1835, "step": 3081 }, { "epoch": 0.4531519941187282, "grad_norm": 0.6141449213027954, "learning_rate": 7.683683683683684e-05, "loss": 1.1094, "step": 3082 }, { "epoch": 0.453299025914354, "grad_norm": 0.5056164860725403, "learning_rate": 7.67967967967968e-05, "loss": 1.1077, "step": 3083 }, { "epoch": 0.4534460577099798, "grad_norm": 0.457361102104187, "learning_rate": 7.675675675675677e-05, "loss": 1.1581, "step": 3084 }, { "epoch": 0.45359308950560556, "grad_norm": 0.5992422103881836, "learning_rate": 7.671671671671671e-05, "loss": 1.2226, "step": 3085 }, { "epoch": 0.45374012130123137, "grad_norm": 0.3869837522506714, "learning_rate": 7.667667667667668e-05, "loss": 1.4855, "step": 3086 }, { "epoch": 0.4538871530968572, "grad_norm": 0.6191926002502441, "learning_rate": 7.663663663663664e-05, "loss": 0.9115, "step": 3087 }, { "epoch": 0.454034184892483, "grad_norm": 0.5228755474090576, "learning_rate": 7.65965965965966e-05, "loss": 1.097, "step": 3088 }, { "epoch": 0.4541812166881088, "grad_norm": 0.5761334896087646, "learning_rate": 7.655655655655656e-05, "loss": 1.215, "step": 3089 }, { "epoch": 0.4543282484837346, "grad_norm": 0.7652355432510376, "learning_rate": 7.651651651651653e-05, "loss": 0.9362, "step": 3090 }, { "epoch": 0.4544752802793604, "grad_norm": 0.605821430683136, "learning_rate": 7.647647647647648e-05, "loss": 1.1049, "step": 3091 }, { "epoch": 0.4546223120749862, "grad_norm": 0.5556384325027466, "learning_rate": 7.643643643643644e-05, "loss": 1.3121, "step": 3092 }, { "epoch": 0.454769343870612, "grad_norm": 0.5746684670448303, "learning_rate": 7.63963963963964e-05, "loss": 1.4415, "step": 3093 }, { "epoch": 0.4549163756662378, "grad_norm": 0.5413640141487122, "learning_rate": 7.635635635635637e-05, "loss": 1.3174, "step": 3094 }, { "epoch": 0.45506340746186363, "grad_norm": 0.6226150393486023, "learning_rate": 7.631631631631631e-05, "loss": 1.1125, "step": 3095 }, { "epoch": 0.45521043925748944, "grad_norm": 0.5952287316322327, "learning_rate": 7.627627627627628e-05, "loss": 1.4399, "step": 3096 }, { "epoch": 0.45535747105311525, "grad_norm": 0.4275517463684082, "learning_rate": 7.623623623623624e-05, "loss": 1.5941, "step": 3097 }, { "epoch": 0.45550450284874106, "grad_norm": 0.49997279047966003, "learning_rate": 7.61961961961962e-05, "loss": 0.9676, "step": 3098 }, { "epoch": 0.45565153464436686, "grad_norm": 0.5103732943534851, "learning_rate": 7.615615615615616e-05, "loss": 1.2189, "step": 3099 }, { "epoch": 0.45579856643999267, "grad_norm": 0.5046006441116333, "learning_rate": 7.611611611611613e-05, "loss": 1.2451, "step": 3100 }, { "epoch": 0.4559455982356185, "grad_norm": 0.4566158652305603, "learning_rate": 7.607607607607607e-05, "loss": 1.2155, "step": 3101 }, { "epoch": 0.4560926300312443, "grad_norm": 0.7640852332115173, "learning_rate": 7.603603603603604e-05, "loss": 0.94, "step": 3102 }, { "epoch": 0.45623966182687004, "grad_norm": 0.4005560874938965, "learning_rate": 7.5995995995996e-05, "loss": 1.7951, "step": 3103 }, { "epoch": 0.45638669362249584, "grad_norm": 0.6121296882629395, "learning_rate": 7.595595595595596e-05, "loss": 1.0201, "step": 3104 }, { "epoch": 0.45653372541812165, "grad_norm": 0.5225343108177185, "learning_rate": 7.591591591591593e-05, "loss": 1.1171, "step": 3105 }, { "epoch": 0.45668075721374746, "grad_norm": 0.6010500192642212, "learning_rate": 7.587587587587588e-05, "loss": 1.2743, "step": 3106 }, { "epoch": 0.45682778900937326, "grad_norm": 0.361880898475647, "learning_rate": 7.583583583583584e-05, "loss": 1.5249, "step": 3107 }, { "epoch": 0.45697482080499907, "grad_norm": 0.7697463631629944, "learning_rate": 7.57957957957958e-05, "loss": 1.0913, "step": 3108 }, { "epoch": 0.4571218526006249, "grad_norm": 0.4920119643211365, "learning_rate": 7.575575575575577e-05, "loss": 1.529, "step": 3109 }, { "epoch": 0.4572688843962507, "grad_norm": 0.5560708045959473, "learning_rate": 7.571571571571571e-05, "loss": 1.1695, "step": 3110 }, { "epoch": 0.4574159161918765, "grad_norm": 0.7044244408607483, "learning_rate": 7.567567567567568e-05, "loss": 0.9013, "step": 3111 }, { "epoch": 0.4575629479875023, "grad_norm": 0.5346618294715881, "learning_rate": 7.563563563563564e-05, "loss": 1.2702, "step": 3112 }, { "epoch": 0.4577099797831281, "grad_norm": 0.4742918610572815, "learning_rate": 7.55955955955956e-05, "loss": 1.2804, "step": 3113 }, { "epoch": 0.4578570115787539, "grad_norm": 0.4911901652812958, "learning_rate": 7.555555555555556e-05, "loss": 1.4077, "step": 3114 }, { "epoch": 0.4580040433743797, "grad_norm": 0.5317238569259644, "learning_rate": 7.551551551551553e-05, "loss": 1.4408, "step": 3115 }, { "epoch": 0.45815107517000553, "grad_norm": 0.3974837362766266, "learning_rate": 7.547547547547547e-05, "loss": 0.8269, "step": 3116 }, { "epoch": 0.45829810696563134, "grad_norm": 0.5316552519798279, "learning_rate": 7.543543543543544e-05, "loss": 0.9533, "step": 3117 }, { "epoch": 0.45844513876125714, "grad_norm": 0.5243070721626282, "learning_rate": 7.53953953953954e-05, "loss": 1.0309, "step": 3118 }, { "epoch": 0.45859217055688295, "grad_norm": 0.47847214341163635, "learning_rate": 7.535535535535536e-05, "loss": 1.0019, "step": 3119 }, { "epoch": 0.45873920235250876, "grad_norm": 0.5561521053314209, "learning_rate": 7.531531531531531e-05, "loss": 1.4231, "step": 3120 }, { "epoch": 0.4588862341481345, "grad_norm": 0.423222154378891, "learning_rate": 7.527527527527528e-05, "loss": 1.2859, "step": 3121 }, { "epoch": 0.4590332659437603, "grad_norm": 0.6410842537879944, "learning_rate": 7.523523523523523e-05, "loss": 1.2076, "step": 3122 }, { "epoch": 0.4591802977393861, "grad_norm": 0.7242981195449829, "learning_rate": 7.51951951951952e-05, "loss": 1.2871, "step": 3123 }, { "epoch": 0.45932732953501193, "grad_norm": 0.713115394115448, "learning_rate": 7.515515515515516e-05, "loss": 1.5349, "step": 3124 }, { "epoch": 0.45947436133063774, "grad_norm": 0.5140429735183716, "learning_rate": 7.511511511511511e-05, "loss": 1.0474, "step": 3125 }, { "epoch": 0.45962139312626354, "grad_norm": 0.6369380950927734, "learning_rate": 7.507507507507507e-05, "loss": 1.2973, "step": 3126 }, { "epoch": 0.45976842492188935, "grad_norm": 0.4863113462924957, "learning_rate": 7.503503503503504e-05, "loss": 1.3063, "step": 3127 }, { "epoch": 0.45991545671751516, "grad_norm": 0.5544506907463074, "learning_rate": 7.4994994994995e-05, "loss": 1.2705, "step": 3128 }, { "epoch": 0.46006248851314097, "grad_norm": 0.5823478102684021, "learning_rate": 7.495495495495496e-05, "loss": 1.3866, "step": 3129 }, { "epoch": 0.4602095203087668, "grad_norm": 0.5914391875267029, "learning_rate": 7.491491491491491e-05, "loss": 1.3032, "step": 3130 }, { "epoch": 0.4603565521043926, "grad_norm": 0.6319800615310669, "learning_rate": 7.487487487487487e-05, "loss": 1.2006, "step": 3131 }, { "epoch": 0.4605035839000184, "grad_norm": 0.7106775641441345, "learning_rate": 7.483483483483484e-05, "loss": 1.1899, "step": 3132 }, { "epoch": 0.4606506156956442, "grad_norm": 0.558309018611908, "learning_rate": 7.47947947947948e-05, "loss": 1.2956, "step": 3133 }, { "epoch": 0.46079764749127, "grad_norm": 0.5816996097564697, "learning_rate": 7.475475475475476e-05, "loss": 1.2326, "step": 3134 }, { "epoch": 0.4609446792868958, "grad_norm": 0.7314457893371582, "learning_rate": 7.471471471471471e-05, "loss": 1.3747, "step": 3135 }, { "epoch": 0.4610917110825216, "grad_norm": 0.7735886573791504, "learning_rate": 7.467467467467469e-05, "loss": 0.9986, "step": 3136 }, { "epoch": 0.4612387428781474, "grad_norm": 0.5291430950164795, "learning_rate": 7.463463463463464e-05, "loss": 1.3356, "step": 3137 }, { "epoch": 0.46138577467377323, "grad_norm": 0.5009344220161438, "learning_rate": 7.45945945945946e-05, "loss": 1.3914, "step": 3138 }, { "epoch": 0.461532806469399, "grad_norm": 0.6736690998077393, "learning_rate": 7.455455455455456e-05, "loss": 1.251, "step": 3139 }, { "epoch": 0.4616798382650248, "grad_norm": 0.5644371509552002, "learning_rate": 7.451451451451452e-05, "loss": 1.2718, "step": 3140 }, { "epoch": 0.4618268700606506, "grad_norm": 0.6560328006744385, "learning_rate": 7.447447447447447e-05, "loss": 1.1835, "step": 3141 }, { "epoch": 0.4619739018562764, "grad_norm": 0.5204223990440369, "learning_rate": 7.443443443443444e-05, "loss": 1.1754, "step": 3142 }, { "epoch": 0.4621209336519022, "grad_norm": 0.513964831829071, "learning_rate": 7.43943943943944e-05, "loss": 1.0435, "step": 3143 }, { "epoch": 0.462267965447528, "grad_norm": 0.7342390418052673, "learning_rate": 7.435435435435436e-05, "loss": 0.9614, "step": 3144 }, { "epoch": 0.4624149972431538, "grad_norm": 0.6051604151725769, "learning_rate": 7.431431431431432e-05, "loss": 1.2794, "step": 3145 }, { "epoch": 0.46256202903877963, "grad_norm": 0.4730220437049866, "learning_rate": 7.427427427427429e-05, "loss": 1.6719, "step": 3146 }, { "epoch": 0.46270906083440544, "grad_norm": 0.7216799259185791, "learning_rate": 7.423423423423423e-05, "loss": 1.0916, "step": 3147 }, { "epoch": 0.46285609263003125, "grad_norm": 0.5310493111610413, "learning_rate": 7.41941941941942e-05, "loss": 1.2615, "step": 3148 }, { "epoch": 0.46300312442565705, "grad_norm": 0.7141684889793396, "learning_rate": 7.415415415415416e-05, "loss": 1.5487, "step": 3149 }, { "epoch": 0.46315015622128286, "grad_norm": 0.6389233469963074, "learning_rate": 7.411411411411412e-05, "loss": 1.3036, "step": 3150 }, { "epoch": 0.46329718801690867, "grad_norm": 0.8041030764579773, "learning_rate": 7.407407407407407e-05, "loss": 1.2785, "step": 3151 }, { "epoch": 0.4634442198125345, "grad_norm": 0.76786869764328, "learning_rate": 7.403403403403404e-05, "loss": 1.144, "step": 3152 }, { "epoch": 0.4635912516081603, "grad_norm": 0.5625277757644653, "learning_rate": 7.399399399399399e-05, "loss": 1.2234, "step": 3153 }, { "epoch": 0.4637382834037861, "grad_norm": 0.6522327065467834, "learning_rate": 7.395395395395396e-05, "loss": 0.9828, "step": 3154 }, { "epoch": 0.4638853151994119, "grad_norm": 0.5087584257125854, "learning_rate": 7.391391391391392e-05, "loss": 1.3631, "step": 3155 }, { "epoch": 0.4640323469950377, "grad_norm": 0.7143625617027283, "learning_rate": 7.387387387387387e-05, "loss": 1.0932, "step": 3156 }, { "epoch": 0.46417937879066345, "grad_norm": 0.622378945350647, "learning_rate": 7.383383383383383e-05, "loss": 1.2341, "step": 3157 }, { "epoch": 0.46432641058628926, "grad_norm": 0.44317391514778137, "learning_rate": 7.37937937937938e-05, "loss": 1.3132, "step": 3158 }, { "epoch": 0.46447344238191507, "grad_norm": 0.553407609462738, "learning_rate": 7.375375375375376e-05, "loss": 1.2704, "step": 3159 }, { "epoch": 0.4646204741775409, "grad_norm": 0.5492808818817139, "learning_rate": 7.371371371371372e-05, "loss": 1.0015, "step": 3160 }, { "epoch": 0.4647675059731667, "grad_norm": 0.5664077997207642, "learning_rate": 7.367367367367369e-05, "loss": 1.386, "step": 3161 }, { "epoch": 0.4649145377687925, "grad_norm": 0.5891731381416321, "learning_rate": 7.363363363363363e-05, "loss": 1.3508, "step": 3162 }, { "epoch": 0.4650615695644183, "grad_norm": 0.5016058087348938, "learning_rate": 7.35935935935936e-05, "loss": 1.1335, "step": 3163 }, { "epoch": 0.4652086013600441, "grad_norm": 0.5653664469718933, "learning_rate": 7.355355355355356e-05, "loss": 1.3224, "step": 3164 }, { "epoch": 0.4653556331556699, "grad_norm": 0.6252666115760803, "learning_rate": 7.351351351351352e-05, "loss": 1.7772, "step": 3165 }, { "epoch": 0.4655026649512957, "grad_norm": 0.4957193434238434, "learning_rate": 7.347347347347347e-05, "loss": 1.4306, "step": 3166 }, { "epoch": 0.4656496967469215, "grad_norm": 0.5668092966079712, "learning_rate": 7.343343343343344e-05, "loss": 0.988, "step": 3167 }, { "epoch": 0.46579672854254733, "grad_norm": 0.8000352382659912, "learning_rate": 7.339339339339339e-05, "loss": 1.0514, "step": 3168 }, { "epoch": 0.46594376033817314, "grad_norm": 0.5491058826446533, "learning_rate": 7.335335335335336e-05, "loss": 1.2817, "step": 3169 }, { "epoch": 0.46609079213379895, "grad_norm": 0.4835140109062195, "learning_rate": 7.331331331331332e-05, "loss": 1.4577, "step": 3170 }, { "epoch": 0.46623782392942476, "grad_norm": 0.6204507350921631, "learning_rate": 7.327327327327327e-05, "loss": 1.0923, "step": 3171 }, { "epoch": 0.46638485572505056, "grad_norm": 0.522095263004303, "learning_rate": 7.323323323323323e-05, "loss": 1.2373, "step": 3172 }, { "epoch": 0.46653188752067637, "grad_norm": 0.5510648488998413, "learning_rate": 7.31931931931932e-05, "loss": 1.1671, "step": 3173 }, { "epoch": 0.4666789193163022, "grad_norm": 0.6043539643287659, "learning_rate": 7.315315315315316e-05, "loss": 1.2634, "step": 3174 }, { "epoch": 0.46682595111192793, "grad_norm": 0.447717547416687, "learning_rate": 7.311311311311312e-05, "loss": 1.4052, "step": 3175 }, { "epoch": 0.46697298290755374, "grad_norm": 0.8987585306167603, "learning_rate": 7.307307307307307e-05, "loss": 1.2323, "step": 3176 }, { "epoch": 0.46712001470317954, "grad_norm": 0.4988643527030945, "learning_rate": 7.303303303303303e-05, "loss": 1.1818, "step": 3177 }, { "epoch": 0.46726704649880535, "grad_norm": 0.4966548681259155, "learning_rate": 7.299299299299299e-05, "loss": 1.4234, "step": 3178 }, { "epoch": 0.46741407829443116, "grad_norm": 0.5231966376304626, "learning_rate": 7.295295295295296e-05, "loss": 1.009, "step": 3179 }, { "epoch": 0.46756111009005696, "grad_norm": 0.604512631893158, "learning_rate": 7.291291291291292e-05, "loss": 1.1619, "step": 3180 }, { "epoch": 0.46770814188568277, "grad_norm": 0.719962477684021, "learning_rate": 7.287287287287287e-05, "loss": 0.9192, "step": 3181 }, { "epoch": 0.4678551736813086, "grad_norm": 0.41954106092453003, "learning_rate": 7.283283283283283e-05, "loss": 1.5271, "step": 3182 }, { "epoch": 0.4680022054769344, "grad_norm": 0.559506893157959, "learning_rate": 7.27927927927928e-05, "loss": 1.2804, "step": 3183 }, { "epoch": 0.4681492372725602, "grad_norm": 0.5913854241371155, "learning_rate": 7.275275275275275e-05, "loss": 1.1265, "step": 3184 }, { "epoch": 0.468296269068186, "grad_norm": 0.7071521282196045, "learning_rate": 7.271271271271272e-05, "loss": 0.9495, "step": 3185 }, { "epoch": 0.4684433008638118, "grad_norm": 0.6973623633384705, "learning_rate": 7.267267267267268e-05, "loss": 0.9897, "step": 3186 }, { "epoch": 0.4685903326594376, "grad_norm": 0.5952861309051514, "learning_rate": 7.263263263263263e-05, "loss": 1.3972, "step": 3187 }, { "epoch": 0.4687373644550634, "grad_norm": 0.5680931210517883, "learning_rate": 7.25925925925926e-05, "loss": 1.4581, "step": 3188 }, { "epoch": 0.46888439625068923, "grad_norm": 0.7241812944412231, "learning_rate": 7.255255255255256e-05, "loss": 0.8892, "step": 3189 }, { "epoch": 0.46903142804631504, "grad_norm": 0.6984421610832214, "learning_rate": 7.251251251251252e-05, "loss": 1.145, "step": 3190 }, { "epoch": 0.46917845984194084, "grad_norm": 0.6072508692741394, "learning_rate": 7.247247247247248e-05, "loss": 1.1985, "step": 3191 }, { "epoch": 0.46932549163756665, "grad_norm": 0.6277791857719421, "learning_rate": 7.243243243243245e-05, "loss": 1.3544, "step": 3192 }, { "epoch": 0.4694725234331924, "grad_norm": 0.5972728729248047, "learning_rate": 7.239239239239239e-05, "loss": 1.1753, "step": 3193 }, { "epoch": 0.4696195552288182, "grad_norm": 0.6507230401039124, "learning_rate": 7.235235235235236e-05, "loss": 0.9752, "step": 3194 }, { "epoch": 0.469766587024444, "grad_norm": 0.3800307810306549, "learning_rate": 7.231231231231232e-05, "loss": 1.5189, "step": 3195 }, { "epoch": 0.4699136188200698, "grad_norm": 0.6721126437187195, "learning_rate": 7.227227227227228e-05, "loss": 1.1514, "step": 3196 }, { "epoch": 0.47006065061569563, "grad_norm": 0.5962321162223816, "learning_rate": 7.223223223223223e-05, "loss": 1.1173, "step": 3197 }, { "epoch": 0.47020768241132144, "grad_norm": 0.6528775095939636, "learning_rate": 7.21921921921922e-05, "loss": 1.4845, "step": 3198 }, { "epoch": 0.47035471420694724, "grad_norm": 0.7845389246940613, "learning_rate": 7.215215215215215e-05, "loss": 0.8664, "step": 3199 }, { "epoch": 0.47050174600257305, "grad_norm": 0.6082313060760498, "learning_rate": 7.211211211211212e-05, "loss": 1.198, "step": 3200 }, { "epoch": 0.47064877779819886, "grad_norm": 0.5247923135757446, "learning_rate": 7.207207207207208e-05, "loss": 1.8579, "step": 3201 }, { "epoch": 0.47079580959382467, "grad_norm": 0.5476462244987488, "learning_rate": 7.203203203203203e-05, "loss": 1.1089, "step": 3202 }, { "epoch": 0.4709428413894505, "grad_norm": 0.5576204657554626, "learning_rate": 7.199199199199199e-05, "loss": 1.3273, "step": 3203 }, { "epoch": 0.4710898731850763, "grad_norm": 0.5106037855148315, "learning_rate": 7.195195195195196e-05, "loss": 0.8588, "step": 3204 }, { "epoch": 0.4712369049807021, "grad_norm": 0.7927364706993103, "learning_rate": 7.19119119119119e-05, "loss": 0.9161, "step": 3205 }, { "epoch": 0.4713839367763279, "grad_norm": 0.4807494878768921, "learning_rate": 7.187187187187188e-05, "loss": 1.3247, "step": 3206 }, { "epoch": 0.4715309685719537, "grad_norm": 0.6596494317054749, "learning_rate": 7.183183183183183e-05, "loss": 1.0688, "step": 3207 }, { "epoch": 0.4716780003675795, "grad_norm": 0.582834005355835, "learning_rate": 7.179179179179179e-05, "loss": 1.4247, "step": 3208 }, { "epoch": 0.4718250321632053, "grad_norm": 0.648859441280365, "learning_rate": 7.175175175175175e-05, "loss": 1.4218, "step": 3209 }, { "epoch": 0.4719720639588311, "grad_norm": 0.7592419385910034, "learning_rate": 7.171171171171172e-05, "loss": 1.3742, "step": 3210 }, { "epoch": 0.4721190957544569, "grad_norm": 0.7208430171012878, "learning_rate": 7.167167167167166e-05, "loss": 1.173, "step": 3211 }, { "epoch": 0.4722661275500827, "grad_norm": 0.5124316215515137, "learning_rate": 7.163163163163163e-05, "loss": 1.2339, "step": 3212 }, { "epoch": 0.4724131593457085, "grad_norm": 0.5721129775047302, "learning_rate": 7.15915915915916e-05, "loss": 1.0254, "step": 3213 }, { "epoch": 0.4725601911413343, "grad_norm": 0.5043213963508606, "learning_rate": 7.155155155155155e-05, "loss": 1.582, "step": 3214 }, { "epoch": 0.4727072229369601, "grad_norm": 0.7064675092697144, "learning_rate": 7.151151151151152e-05, "loss": 1.1194, "step": 3215 }, { "epoch": 0.4728542547325859, "grad_norm": 0.6035963296890259, "learning_rate": 7.147147147147148e-05, "loss": 1.2392, "step": 3216 }, { "epoch": 0.4730012865282117, "grad_norm": 0.5279290676116943, "learning_rate": 7.143143143143143e-05, "loss": 1.2897, "step": 3217 }, { "epoch": 0.4731483183238375, "grad_norm": 0.45839574933052063, "learning_rate": 7.139139139139139e-05, "loss": 1.4768, "step": 3218 }, { "epoch": 0.47329535011946333, "grad_norm": 0.5507638454437256, "learning_rate": 7.135135135135136e-05, "loss": 1.0862, "step": 3219 }, { "epoch": 0.47344238191508914, "grad_norm": 0.6731868386268616, "learning_rate": 7.131131131131132e-05, "loss": 1.062, "step": 3220 }, { "epoch": 0.47358941371071495, "grad_norm": 0.5090354681015015, "learning_rate": 7.127127127127128e-05, "loss": 1.3862, "step": 3221 }, { "epoch": 0.47373644550634075, "grad_norm": 0.5207750201225281, "learning_rate": 7.123123123123123e-05, "loss": 0.9856, "step": 3222 }, { "epoch": 0.47388347730196656, "grad_norm": 0.48361724615097046, "learning_rate": 7.119119119119119e-05, "loss": 1.3241, "step": 3223 }, { "epoch": 0.47403050909759237, "grad_norm": 0.7576636075973511, "learning_rate": 7.115115115115115e-05, "loss": 1.1382, "step": 3224 }, { "epoch": 0.4741775408932182, "grad_norm": 0.4368208348751068, "learning_rate": 7.111111111111112e-05, "loss": 1.4422, "step": 3225 }, { "epoch": 0.474324572688844, "grad_norm": 0.41935113072395325, "learning_rate": 7.107107107107108e-05, "loss": 1.3364, "step": 3226 }, { "epoch": 0.4744716044844698, "grad_norm": 0.4772408902645111, "learning_rate": 7.103103103103103e-05, "loss": 1.5426, "step": 3227 }, { "epoch": 0.4746186362800956, "grad_norm": 0.6283818483352661, "learning_rate": 7.099099099099099e-05, "loss": 1.1303, "step": 3228 }, { "epoch": 0.47476566807572135, "grad_norm": 0.5410350561141968, "learning_rate": 7.095095095095096e-05, "loss": 1.437, "step": 3229 }, { "epoch": 0.47491269987134715, "grad_norm": 0.5475489497184753, "learning_rate": 7.09109109109109e-05, "loss": 1.0147, "step": 3230 }, { "epoch": 0.47505973166697296, "grad_norm": 0.6833570003509521, "learning_rate": 7.087087087087088e-05, "loss": 1.4046, "step": 3231 }, { "epoch": 0.47520676346259877, "grad_norm": 0.6560105681419373, "learning_rate": 7.083083083083083e-05, "loss": 1.1864, "step": 3232 }, { "epoch": 0.4753537952582246, "grad_norm": 0.5781462788581848, "learning_rate": 7.079079079079079e-05, "loss": 1.2459, "step": 3233 }, { "epoch": 0.4755008270538504, "grad_norm": 0.6918640732765198, "learning_rate": 7.075075075075075e-05, "loss": 0.9522, "step": 3234 }, { "epoch": 0.4756478588494762, "grad_norm": 0.6307013630867004, "learning_rate": 7.071071071071072e-05, "loss": 1.1081, "step": 3235 }, { "epoch": 0.475794890645102, "grad_norm": 0.6298636198043823, "learning_rate": 7.067067067067066e-05, "loss": 1.0651, "step": 3236 }, { "epoch": 0.4759419224407278, "grad_norm": 0.6778161525726318, "learning_rate": 7.063063063063064e-05, "loss": 1.1546, "step": 3237 }, { "epoch": 0.4760889542363536, "grad_norm": 0.5660976767539978, "learning_rate": 7.059059059059059e-05, "loss": 1.5648, "step": 3238 }, { "epoch": 0.4762359860319794, "grad_norm": 0.5716121196746826, "learning_rate": 7.055055055055055e-05, "loss": 1.1623, "step": 3239 }, { "epoch": 0.4763830178276052, "grad_norm": 0.5568304061889648, "learning_rate": 7.051051051051052e-05, "loss": 0.8338, "step": 3240 }, { "epoch": 0.47653004962323103, "grad_norm": 0.6741575002670288, "learning_rate": 7.047047047047048e-05, "loss": 1.0937, "step": 3241 }, { "epoch": 0.47667708141885684, "grad_norm": 0.6590014696121216, "learning_rate": 7.043043043043044e-05, "loss": 1.2184, "step": 3242 }, { "epoch": 0.47682411321448265, "grad_norm": 0.5639827251434326, "learning_rate": 7.039039039039039e-05, "loss": 1.0126, "step": 3243 }, { "epoch": 0.47697114501010845, "grad_norm": 0.47901928424835205, "learning_rate": 7.035035035035036e-05, "loss": 1.7836, "step": 3244 }, { "epoch": 0.47711817680573426, "grad_norm": 0.4116479456424713, "learning_rate": 7.031031031031031e-05, "loss": 1.2165, "step": 3245 }, { "epoch": 0.47726520860136007, "grad_norm": 0.6687740087509155, "learning_rate": 7.027027027027028e-05, "loss": 0.9399, "step": 3246 }, { "epoch": 0.4774122403969858, "grad_norm": 0.6609377861022949, "learning_rate": 7.023023023023024e-05, "loss": 1.2119, "step": 3247 }, { "epoch": 0.47755927219261163, "grad_norm": 0.46638748049736023, "learning_rate": 7.019019019019019e-05, "loss": 1.6239, "step": 3248 }, { "epoch": 0.47770630398823744, "grad_norm": 0.5504456162452698, "learning_rate": 7.015015015015015e-05, "loss": 1.6431, "step": 3249 }, { "epoch": 0.47785333578386324, "grad_norm": 0.5258440971374512, "learning_rate": 7.011011011011012e-05, "loss": 1.0217, "step": 3250 }, { "epoch": 0.47800036757948905, "grad_norm": 0.6411320567131042, "learning_rate": 7.007007007007007e-05, "loss": 0.9529, "step": 3251 }, { "epoch": 0.47814739937511486, "grad_norm": 0.5060096383094788, "learning_rate": 7.003003003003004e-05, "loss": 0.8755, "step": 3252 }, { "epoch": 0.47829443117074066, "grad_norm": 0.5997748374938965, "learning_rate": 6.998998998999e-05, "loss": 1.053, "step": 3253 }, { "epoch": 0.47844146296636647, "grad_norm": 0.555962324142456, "learning_rate": 6.994994994994995e-05, "loss": 1.1308, "step": 3254 }, { "epoch": 0.4785884947619923, "grad_norm": 0.6625096797943115, "learning_rate": 6.990990990990991e-05, "loss": 1.1652, "step": 3255 }, { "epoch": 0.4787355265576181, "grad_norm": 0.43617936968803406, "learning_rate": 6.986986986986988e-05, "loss": 1.6405, "step": 3256 }, { "epoch": 0.4788825583532439, "grad_norm": 0.5517098307609558, "learning_rate": 6.982982982982982e-05, "loss": 1.0279, "step": 3257 }, { "epoch": 0.4790295901488697, "grad_norm": 0.6235120296478271, "learning_rate": 6.97897897897898e-05, "loss": 1.0911, "step": 3258 }, { "epoch": 0.4791766219444955, "grad_norm": 0.6277541518211365, "learning_rate": 6.974974974974975e-05, "loss": 1.1581, "step": 3259 }, { "epoch": 0.4793236537401213, "grad_norm": 0.4934016764163971, "learning_rate": 6.970970970970971e-05, "loss": 1.2856, "step": 3260 }, { "epoch": 0.4794706855357471, "grad_norm": 0.8197907209396362, "learning_rate": 6.966966966966967e-05, "loss": 1.1003, "step": 3261 }, { "epoch": 0.47961771733137293, "grad_norm": 0.8218384981155396, "learning_rate": 6.962962962962964e-05, "loss": 1.0153, "step": 3262 }, { "epoch": 0.47976474912699874, "grad_norm": 0.3828998804092407, "learning_rate": 6.95895895895896e-05, "loss": 1.9459, "step": 3263 }, { "epoch": 0.47991178092262454, "grad_norm": 0.44151973724365234, "learning_rate": 6.954954954954955e-05, "loss": 1.1069, "step": 3264 }, { "epoch": 0.4800588127182503, "grad_norm": 0.6370716094970703, "learning_rate": 6.950950950950952e-05, "loss": 1.3989, "step": 3265 }, { "epoch": 0.4802058445138761, "grad_norm": 0.7442599534988403, "learning_rate": 6.946946946946947e-05, "loss": 0.9942, "step": 3266 }, { "epoch": 0.4803528763095019, "grad_norm": 0.7800948023796082, "learning_rate": 6.942942942942944e-05, "loss": 1.1109, "step": 3267 }, { "epoch": 0.4804999081051277, "grad_norm": 0.7671271562576294, "learning_rate": 6.93893893893894e-05, "loss": 0.9661, "step": 3268 }, { "epoch": 0.4806469399007535, "grad_norm": 0.611117959022522, "learning_rate": 6.934934934934935e-05, "loss": 0.9957, "step": 3269 }, { "epoch": 0.48079397169637933, "grad_norm": 0.7372732758522034, "learning_rate": 6.930930930930931e-05, "loss": 1.139, "step": 3270 }, { "epoch": 0.48094100349200514, "grad_norm": 0.5962196588516235, "learning_rate": 6.926926926926928e-05, "loss": 1.2346, "step": 3271 }, { "epoch": 0.48108803528763094, "grad_norm": 0.6650049090385437, "learning_rate": 6.922922922922924e-05, "loss": 0.9899, "step": 3272 }, { "epoch": 0.48123506708325675, "grad_norm": 0.5872532725334167, "learning_rate": 6.91891891891892e-05, "loss": 1.3773, "step": 3273 }, { "epoch": 0.48138209887888256, "grad_norm": 0.41576826572418213, "learning_rate": 6.914914914914915e-05, "loss": 1.7622, "step": 3274 }, { "epoch": 0.48152913067450837, "grad_norm": 0.5065469145774841, "learning_rate": 6.910910910910911e-05, "loss": 1.7142, "step": 3275 }, { "epoch": 0.4816761624701342, "grad_norm": 0.5180824995040894, "learning_rate": 6.906906906906907e-05, "loss": 1.0072, "step": 3276 }, { "epoch": 0.48182319426576, "grad_norm": 0.6505728960037231, "learning_rate": 6.902902902902904e-05, "loss": 1.0646, "step": 3277 }, { "epoch": 0.4819702260613858, "grad_norm": 0.5568881630897522, "learning_rate": 6.8988988988989e-05, "loss": 1.039, "step": 3278 }, { "epoch": 0.4821172578570116, "grad_norm": 0.5079647898674011, "learning_rate": 6.894894894894895e-05, "loss": 0.9934, "step": 3279 }, { "epoch": 0.4822642896526374, "grad_norm": 0.5787253975868225, "learning_rate": 6.890890890890891e-05, "loss": 1.0258, "step": 3280 }, { "epoch": 0.4824113214482632, "grad_norm": 0.7626857161521912, "learning_rate": 6.886886886886888e-05, "loss": 1.1546, "step": 3281 }, { "epoch": 0.482558353243889, "grad_norm": 0.5568035840988159, "learning_rate": 6.882882882882882e-05, "loss": 1.0485, "step": 3282 }, { "epoch": 0.4827053850395148, "grad_norm": 0.7178840041160583, "learning_rate": 6.87887887887888e-05, "loss": 1.2585, "step": 3283 }, { "epoch": 0.4828524168351406, "grad_norm": 0.6728935241699219, "learning_rate": 6.874874874874875e-05, "loss": 1.0297, "step": 3284 }, { "epoch": 0.4829994486307664, "grad_norm": 0.5099184513092041, "learning_rate": 6.870870870870871e-05, "loss": 1.1726, "step": 3285 }, { "epoch": 0.4831464804263922, "grad_norm": 0.5514228343963623, "learning_rate": 6.866866866866867e-05, "loss": 0.9795, "step": 3286 }, { "epoch": 0.483293512222018, "grad_norm": 0.5860190987586975, "learning_rate": 6.862862862862864e-05, "loss": 1.0831, "step": 3287 }, { "epoch": 0.4834405440176438, "grad_norm": 0.5907740592956543, "learning_rate": 6.858858858858858e-05, "loss": 1.0128, "step": 3288 }, { "epoch": 0.4835875758132696, "grad_norm": 0.5860800743103027, "learning_rate": 6.854854854854855e-05, "loss": 1.5108, "step": 3289 }, { "epoch": 0.4837346076088954, "grad_norm": 0.48073041439056396, "learning_rate": 6.850850850850851e-05, "loss": 1.3836, "step": 3290 }, { "epoch": 0.4838816394045212, "grad_norm": 0.5692874789237976, "learning_rate": 6.846846846846847e-05, "loss": 0.9091, "step": 3291 }, { "epoch": 0.48402867120014703, "grad_norm": 0.49851879477500916, "learning_rate": 6.842842842842844e-05, "loss": 1.3859, "step": 3292 }, { "epoch": 0.48417570299577284, "grad_norm": 0.5209751725196838, "learning_rate": 6.83883883883884e-05, "loss": 1.2803, "step": 3293 }, { "epoch": 0.48432273479139865, "grad_norm": 0.48093143105506897, "learning_rate": 6.834834834834835e-05, "loss": 1.0768, "step": 3294 }, { "epoch": 0.48446976658702445, "grad_norm": 0.6140349507331848, "learning_rate": 6.830830830830831e-05, "loss": 1.1953, "step": 3295 }, { "epoch": 0.48461679838265026, "grad_norm": 0.6102420091629028, "learning_rate": 6.826826826826828e-05, "loss": 1.2083, "step": 3296 }, { "epoch": 0.48476383017827607, "grad_norm": 0.48081493377685547, "learning_rate": 6.822822822822822e-05, "loss": 1.5248, "step": 3297 }, { "epoch": 0.4849108619739019, "grad_norm": 0.5199002623558044, "learning_rate": 6.81881881881882e-05, "loss": 1.1074, "step": 3298 }, { "epoch": 0.4850578937695277, "grad_norm": 0.6573747396469116, "learning_rate": 6.814814814814815e-05, "loss": 0.9098, "step": 3299 }, { "epoch": 0.4852049255651535, "grad_norm": 0.6496981382369995, "learning_rate": 6.810810810810811e-05, "loss": 1.188, "step": 3300 }, { "epoch": 0.4853519573607793, "grad_norm": 0.6297982335090637, "learning_rate": 6.806806806806807e-05, "loss": 1.3595, "step": 3301 }, { "epoch": 0.48549898915640505, "grad_norm": 0.47232648730278015, "learning_rate": 6.802802802802804e-05, "loss": 0.9832, "step": 3302 }, { "epoch": 0.48564602095203085, "grad_norm": 0.6719629764556885, "learning_rate": 6.798798798798798e-05, "loss": 1.0843, "step": 3303 }, { "epoch": 0.48579305274765666, "grad_norm": 0.48230165243148804, "learning_rate": 6.794794794794795e-05, "loss": 1.6386, "step": 3304 }, { "epoch": 0.48594008454328247, "grad_norm": 0.7190559506416321, "learning_rate": 6.790790790790791e-05, "loss": 1.2447, "step": 3305 }, { "epoch": 0.4860871163389083, "grad_norm": 0.5606859922409058, "learning_rate": 6.786786786786787e-05, "loss": 1.5175, "step": 3306 }, { "epoch": 0.4862341481345341, "grad_norm": 0.7099050879478455, "learning_rate": 6.782782782782783e-05, "loss": 0.935, "step": 3307 }, { "epoch": 0.4863811799301599, "grad_norm": 0.5901654958724976, "learning_rate": 6.77877877877878e-05, "loss": 1.3616, "step": 3308 }, { "epoch": 0.4865282117257857, "grad_norm": 0.6656291484832764, "learning_rate": 6.774774774774775e-05, "loss": 1.1265, "step": 3309 }, { "epoch": 0.4866752435214115, "grad_norm": 0.49714264273643494, "learning_rate": 6.770770770770771e-05, "loss": 1.3588, "step": 3310 }, { "epoch": 0.4868222753170373, "grad_norm": 0.49511489272117615, "learning_rate": 6.766766766766767e-05, "loss": 1.644, "step": 3311 }, { "epoch": 0.4869693071126631, "grad_norm": 0.5144782066345215, "learning_rate": 6.762762762762763e-05, "loss": 1.4862, "step": 3312 }, { "epoch": 0.4871163389082889, "grad_norm": 0.46201932430267334, "learning_rate": 6.758758758758758e-05, "loss": 1.5959, "step": 3313 }, { "epoch": 0.48726337070391473, "grad_norm": 0.40428048372268677, "learning_rate": 6.754754754754755e-05, "loss": 1.3271, "step": 3314 }, { "epoch": 0.48741040249954054, "grad_norm": 0.7480417490005493, "learning_rate": 6.750750750750751e-05, "loss": 0.9003, "step": 3315 }, { "epoch": 0.48755743429516635, "grad_norm": 0.56517094373703, "learning_rate": 6.746746746746747e-05, "loss": 1.1447, "step": 3316 }, { "epoch": 0.48770446609079215, "grad_norm": 0.6307684779167175, "learning_rate": 6.742742742742743e-05, "loss": 0.9576, "step": 3317 }, { "epoch": 0.48785149788641796, "grad_norm": 0.6423370242118835, "learning_rate": 6.73873873873874e-05, "loss": 1.3298, "step": 3318 }, { "epoch": 0.48799852968204377, "grad_norm": 0.44675466418266296, "learning_rate": 6.734734734734735e-05, "loss": 1.4309, "step": 3319 }, { "epoch": 0.4881455614776695, "grad_norm": 0.6233170032501221, "learning_rate": 6.730730730730731e-05, "loss": 1.0473, "step": 3320 }, { "epoch": 0.48829259327329533, "grad_norm": 0.6302303075790405, "learning_rate": 6.726726726726727e-05, "loss": 1.2033, "step": 3321 }, { "epoch": 0.48843962506892113, "grad_norm": 0.4309339225292206, "learning_rate": 6.722722722722723e-05, "loss": 1.3139, "step": 3322 }, { "epoch": 0.48858665686454694, "grad_norm": 0.5400303602218628, "learning_rate": 6.71871871871872e-05, "loss": 1.1697, "step": 3323 }, { "epoch": 0.48873368866017275, "grad_norm": 0.603607714176178, "learning_rate": 6.714714714714715e-05, "loss": 1.0623, "step": 3324 }, { "epoch": 0.48888072045579856, "grad_norm": 0.6371999979019165, "learning_rate": 6.710710710710711e-05, "loss": 1.0855, "step": 3325 }, { "epoch": 0.48902775225142436, "grad_norm": 0.5177183151245117, "learning_rate": 6.706706706706707e-05, "loss": 1.2566, "step": 3326 }, { "epoch": 0.48917478404705017, "grad_norm": 0.6910794377326965, "learning_rate": 6.702702702702704e-05, "loss": 0.9662, "step": 3327 }, { "epoch": 0.489321815842676, "grad_norm": 0.464193731546402, "learning_rate": 6.698698698698698e-05, "loss": 1.3604, "step": 3328 }, { "epoch": 0.4894688476383018, "grad_norm": 0.642329216003418, "learning_rate": 6.694694694694695e-05, "loss": 1.3321, "step": 3329 }, { "epoch": 0.4896158794339276, "grad_norm": 0.6557053327560425, "learning_rate": 6.690690690690691e-05, "loss": 1.0816, "step": 3330 }, { "epoch": 0.4897629112295534, "grad_norm": 0.6464946269989014, "learning_rate": 6.686686686686687e-05, "loss": 1.197, "step": 3331 }, { "epoch": 0.4899099430251792, "grad_norm": 0.6376494765281677, "learning_rate": 6.682682682682683e-05, "loss": 1.3087, "step": 3332 }, { "epoch": 0.490056974820805, "grad_norm": 0.5340996980667114, "learning_rate": 6.67867867867868e-05, "loss": 1.0811, "step": 3333 }, { "epoch": 0.4902040066164308, "grad_norm": 0.6553524136543274, "learning_rate": 6.674674674674674e-05, "loss": 1.1828, "step": 3334 }, { "epoch": 0.49035103841205663, "grad_norm": 0.7293433547019958, "learning_rate": 6.670670670670671e-05, "loss": 1.1346, "step": 3335 }, { "epoch": 0.49049807020768244, "grad_norm": 0.47143951058387756, "learning_rate": 6.666666666666667e-05, "loss": 0.9469, "step": 3336 }, { "epoch": 0.49064510200330824, "grad_norm": 0.5818648934364319, "learning_rate": 6.662662662662663e-05, "loss": 0.9733, "step": 3337 }, { "epoch": 0.490792133798934, "grad_norm": 0.5451882481575012, "learning_rate": 6.658658658658658e-05, "loss": 0.5997, "step": 3338 }, { "epoch": 0.4909391655945598, "grad_norm": 0.4571952223777771, "learning_rate": 6.654654654654656e-05, "loss": 1.4245, "step": 3339 }, { "epoch": 0.4910861973901856, "grad_norm": 0.5968004465103149, "learning_rate": 6.65065065065065e-05, "loss": 0.9663, "step": 3340 }, { "epoch": 0.4912332291858114, "grad_norm": 0.6068567037582397, "learning_rate": 6.646646646646647e-05, "loss": 0.9906, "step": 3341 }, { "epoch": 0.4913802609814372, "grad_norm": 0.48862892389297485, "learning_rate": 6.642642642642643e-05, "loss": 1.5144, "step": 3342 }, { "epoch": 0.49152729277706303, "grad_norm": 0.7602874040603638, "learning_rate": 6.638638638638638e-05, "loss": 0.794, "step": 3343 }, { "epoch": 0.49167432457268884, "grad_norm": 0.6745301485061646, "learning_rate": 6.634634634634634e-05, "loss": 1.1612, "step": 3344 }, { "epoch": 0.49182135636831464, "grad_norm": 0.49233478307724, "learning_rate": 6.630630630630631e-05, "loss": 1.4111, "step": 3345 }, { "epoch": 0.49196838816394045, "grad_norm": 0.39789021015167236, "learning_rate": 6.626626626626627e-05, "loss": 1.1296, "step": 3346 }, { "epoch": 0.49211541995956626, "grad_norm": 0.6162694096565247, "learning_rate": 6.622622622622623e-05, "loss": 1.0133, "step": 3347 }, { "epoch": 0.49226245175519207, "grad_norm": 0.4749471843242645, "learning_rate": 6.61861861861862e-05, "loss": 1.4695, "step": 3348 }, { "epoch": 0.4924094835508179, "grad_norm": 0.6323646306991577, "learning_rate": 6.614614614614614e-05, "loss": 0.856, "step": 3349 }, { "epoch": 0.4925565153464437, "grad_norm": 0.5263227820396423, "learning_rate": 6.610610610610611e-05, "loss": 1.4992, "step": 3350 }, { "epoch": 0.4927035471420695, "grad_norm": 0.6260164380073547, "learning_rate": 6.606606606606607e-05, "loss": 0.9273, "step": 3351 }, { "epoch": 0.4928505789376953, "grad_norm": 0.5665484070777893, "learning_rate": 6.602602602602603e-05, "loss": 1.6253, "step": 3352 }, { "epoch": 0.4929976107333211, "grad_norm": 0.6353617906570435, "learning_rate": 6.598598598598599e-05, "loss": 1.2117, "step": 3353 }, { "epoch": 0.4931446425289469, "grad_norm": 0.43839597702026367, "learning_rate": 6.594594594594596e-05, "loss": 1.5586, "step": 3354 }, { "epoch": 0.4932916743245727, "grad_norm": 0.7278533577919006, "learning_rate": 6.590590590590591e-05, "loss": 1.147, "step": 3355 }, { "epoch": 0.49343870612019847, "grad_norm": 0.5341598987579346, "learning_rate": 6.586586586586587e-05, "loss": 1.0181, "step": 3356 }, { "epoch": 0.4935857379158243, "grad_norm": 0.4240419864654541, "learning_rate": 6.582582582582583e-05, "loss": 1.4163, "step": 3357 }, { "epoch": 0.4937327697114501, "grad_norm": 0.37597787380218506, "learning_rate": 6.578578578578579e-05, "loss": 1.5788, "step": 3358 }, { "epoch": 0.4938798015070759, "grad_norm": 0.5935999155044556, "learning_rate": 6.574574574574574e-05, "loss": 1.1984, "step": 3359 }, { "epoch": 0.4940268333027017, "grad_norm": 0.5849061012268066, "learning_rate": 6.570570570570571e-05, "loss": 1.4727, "step": 3360 }, { "epoch": 0.4941738650983275, "grad_norm": 0.4390101730823517, "learning_rate": 6.566566566566567e-05, "loss": 1.1105, "step": 3361 }, { "epoch": 0.4943208968939533, "grad_norm": 0.7853102684020996, "learning_rate": 6.562562562562563e-05, "loss": 1.0171, "step": 3362 }, { "epoch": 0.4944679286895791, "grad_norm": 0.5718558430671692, "learning_rate": 6.558558558558559e-05, "loss": 0.9464, "step": 3363 }, { "epoch": 0.4946149604852049, "grad_norm": 0.7989034056663513, "learning_rate": 6.554554554554556e-05, "loss": 0.9529, "step": 3364 }, { "epoch": 0.49476199228083073, "grad_norm": 0.5450612902641296, "learning_rate": 6.55055055055055e-05, "loss": 0.7431, "step": 3365 }, { "epoch": 0.49490902407645654, "grad_norm": 0.6750144958496094, "learning_rate": 6.546546546546547e-05, "loss": 1.0054, "step": 3366 }, { "epoch": 0.49505605587208235, "grad_norm": 0.6674582362174988, "learning_rate": 6.542542542542543e-05, "loss": 1.2071, "step": 3367 }, { "epoch": 0.49520308766770815, "grad_norm": 0.6827885508537292, "learning_rate": 6.538538538538539e-05, "loss": 1.0311, "step": 3368 }, { "epoch": 0.49535011946333396, "grad_norm": 0.5705289840698242, "learning_rate": 6.534534534534534e-05, "loss": 1.0985, "step": 3369 }, { "epoch": 0.49549715125895977, "grad_norm": 0.7413957715034485, "learning_rate": 6.530530530530531e-05, "loss": 0.9992, "step": 3370 }, { "epoch": 0.4956441830545856, "grad_norm": 0.5783135890960693, "learning_rate": 6.526526526526526e-05, "loss": 1.3526, "step": 3371 }, { "epoch": 0.4957912148502114, "grad_norm": 0.6032562255859375, "learning_rate": 6.522522522522523e-05, "loss": 1.2392, "step": 3372 }, { "epoch": 0.4959382466458372, "grad_norm": 0.6077689528465271, "learning_rate": 6.51851851851852e-05, "loss": 1.1085, "step": 3373 }, { "epoch": 0.49608527844146294, "grad_norm": 0.6343966126441956, "learning_rate": 6.514514514514514e-05, "loss": 0.9515, "step": 3374 }, { "epoch": 0.49623231023708875, "grad_norm": 0.8182551264762878, "learning_rate": 6.510510510510511e-05, "loss": 1.0315, "step": 3375 }, { "epoch": 0.49637934203271455, "grad_norm": 0.7553261518478394, "learning_rate": 6.506506506506507e-05, "loss": 1.2242, "step": 3376 }, { "epoch": 0.49652637382834036, "grad_norm": 0.4930129051208496, "learning_rate": 6.502502502502503e-05, "loss": 1.2492, "step": 3377 }, { "epoch": 0.49667340562396617, "grad_norm": 0.4160781800746918, "learning_rate": 6.498498498498499e-05, "loss": 1.1823, "step": 3378 }, { "epoch": 0.496820437419592, "grad_norm": 0.49956071376800537, "learning_rate": 6.494494494494496e-05, "loss": 1.9019, "step": 3379 }, { "epoch": 0.4969674692152178, "grad_norm": 0.687004804611206, "learning_rate": 6.49049049049049e-05, "loss": 1.2147, "step": 3380 }, { "epoch": 0.4971145010108436, "grad_norm": 0.5695807337760925, "learning_rate": 6.486486486486487e-05, "loss": 1.5618, "step": 3381 }, { "epoch": 0.4972615328064694, "grad_norm": 0.535466194152832, "learning_rate": 6.482482482482483e-05, "loss": 1.1857, "step": 3382 }, { "epoch": 0.4974085646020952, "grad_norm": 0.6981852054595947, "learning_rate": 6.478478478478479e-05, "loss": 1.2499, "step": 3383 }, { "epoch": 0.497555596397721, "grad_norm": 1.2474606037139893, "learning_rate": 6.474474474474474e-05, "loss": 1.2682, "step": 3384 }, { "epoch": 0.4977026281933468, "grad_norm": 0.47324779629707336, "learning_rate": 6.470470470470472e-05, "loss": 1.5791, "step": 3385 }, { "epoch": 0.4978496599889726, "grad_norm": 0.6258242130279541, "learning_rate": 6.466466466466466e-05, "loss": 0.9348, "step": 3386 }, { "epoch": 0.49799669178459843, "grad_norm": 0.36611059308052063, "learning_rate": 6.462462462462463e-05, "loss": 1.2376, "step": 3387 }, { "epoch": 0.49814372358022424, "grad_norm": 0.6017829179763794, "learning_rate": 6.458458458458459e-05, "loss": 0.9526, "step": 3388 }, { "epoch": 0.49829075537585005, "grad_norm": 0.6481937170028687, "learning_rate": 6.454454454454454e-05, "loss": 1.0949, "step": 3389 }, { "epoch": 0.49843778717147585, "grad_norm": 0.6051651239395142, "learning_rate": 6.45045045045045e-05, "loss": 1.1903, "step": 3390 }, { "epoch": 0.49858481896710166, "grad_norm": 0.5102601051330566, "learning_rate": 6.446446446446447e-05, "loss": 0.9783, "step": 3391 }, { "epoch": 0.4987318507627274, "grad_norm": 0.6168578267097473, "learning_rate": 6.442442442442442e-05, "loss": 1.2306, "step": 3392 }, { "epoch": 0.4988788825583532, "grad_norm": 0.6490815877914429, "learning_rate": 6.438438438438439e-05, "loss": 1.0171, "step": 3393 }, { "epoch": 0.499025914353979, "grad_norm": 0.7570835947990417, "learning_rate": 6.434434434434434e-05, "loss": 1.0066, "step": 3394 }, { "epoch": 0.49917294614960483, "grad_norm": 0.572658121585846, "learning_rate": 6.43043043043043e-05, "loss": 1.0774, "step": 3395 }, { "epoch": 0.49931997794523064, "grad_norm": 0.6210810542106628, "learning_rate": 6.426426426426426e-05, "loss": 1.2011, "step": 3396 }, { "epoch": 0.49946700974085645, "grad_norm": 0.633815586566925, "learning_rate": 6.422422422422423e-05, "loss": 0.9166, "step": 3397 }, { "epoch": 0.49961404153648226, "grad_norm": 0.7618087530136108, "learning_rate": 6.418418418418419e-05, "loss": 1.2597, "step": 3398 }, { "epoch": 0.49976107333210806, "grad_norm": 0.5643290281295776, "learning_rate": 6.414414414414415e-05, "loss": 0.9314, "step": 3399 }, { "epoch": 0.49990810512773387, "grad_norm": 0.7341490983963013, "learning_rate": 6.410410410410412e-05, "loss": 1.3148, "step": 3400 }, { "epoch": 0.5000551369233597, "grad_norm": 0.5781711339950562, "learning_rate": 6.406406406406406e-05, "loss": 1.0804, "step": 3401 }, { "epoch": 0.5002021687189855, "grad_norm": 0.5832050442695618, "learning_rate": 6.402402402402403e-05, "loss": 1.1449, "step": 3402 }, { "epoch": 0.5003492005146113, "grad_norm": 0.5869919657707214, "learning_rate": 6.398398398398399e-05, "loss": 1.0944, "step": 3403 }, { "epoch": 0.5004962323102371, "grad_norm": 0.7026706337928772, "learning_rate": 6.394394394394395e-05, "loss": 1.0286, "step": 3404 }, { "epoch": 0.5006432641058629, "grad_norm": 0.7896353006362915, "learning_rate": 6.39039039039039e-05, "loss": 0.7744, "step": 3405 }, { "epoch": 0.5007902959014887, "grad_norm": 0.5398435592651367, "learning_rate": 6.386386386386387e-05, "loss": 1.2056, "step": 3406 }, { "epoch": 0.5009373276971145, "grad_norm": 0.6787285208702087, "learning_rate": 6.382382382382383e-05, "loss": 0.9494, "step": 3407 }, { "epoch": 0.5010843594927403, "grad_norm": 0.5421918630599976, "learning_rate": 6.378378378378379e-05, "loss": 0.9499, "step": 3408 }, { "epoch": 0.5012313912883661, "grad_norm": 0.7666171789169312, "learning_rate": 6.374374374374375e-05, "loss": 1.1124, "step": 3409 }, { "epoch": 0.5013784230839919, "grad_norm": 0.6074342131614685, "learning_rate": 6.37037037037037e-05, "loss": 1.4996, "step": 3410 }, { "epoch": 0.5015254548796177, "grad_norm": 0.6814637780189514, "learning_rate": 6.366366366366366e-05, "loss": 1.4287, "step": 3411 }, { "epoch": 0.5016724866752436, "grad_norm": 0.5478597283363342, "learning_rate": 6.362362362362363e-05, "loss": 1.191, "step": 3412 }, { "epoch": 0.5018195184708694, "grad_norm": 0.5155368447303772, "learning_rate": 6.358358358358359e-05, "loss": 1.2587, "step": 3413 }, { "epoch": 0.5019665502664952, "grad_norm": 0.5695831775665283, "learning_rate": 6.354354354354355e-05, "loss": 1.2011, "step": 3414 }, { "epoch": 0.502113582062121, "grad_norm": 0.5847434401512146, "learning_rate": 6.35035035035035e-05, "loss": 1.3228, "step": 3415 }, { "epoch": 0.5022606138577468, "grad_norm": 0.6898792386054993, "learning_rate": 6.346346346346347e-05, "loss": 0.9748, "step": 3416 }, { "epoch": 0.5024076456533726, "grad_norm": 0.7162180542945862, "learning_rate": 6.342342342342342e-05, "loss": 0.8771, "step": 3417 }, { "epoch": 0.5025546774489984, "grad_norm": 0.5687199831008911, "learning_rate": 6.338338338338339e-05, "loss": 1.0654, "step": 3418 }, { "epoch": 0.5027017092446241, "grad_norm": 0.8528727889060974, "learning_rate": 6.334334334334335e-05, "loss": 1.1279, "step": 3419 }, { "epoch": 0.5028487410402499, "grad_norm": 0.771495521068573, "learning_rate": 6.33033033033033e-05, "loss": 1.0325, "step": 3420 }, { "epoch": 0.5029957728358757, "grad_norm": 0.78303462266922, "learning_rate": 6.326326326326326e-05, "loss": 1.2582, "step": 3421 }, { "epoch": 0.5031428046315015, "grad_norm": 0.5247710347175598, "learning_rate": 6.322322322322323e-05, "loss": 1.3341, "step": 3422 }, { "epoch": 0.5032898364271273, "grad_norm": 0.5227440595626831, "learning_rate": 6.318318318318318e-05, "loss": 1.3438, "step": 3423 }, { "epoch": 0.5034368682227531, "grad_norm": 0.6320356130599976, "learning_rate": 6.314314314314315e-05, "loss": 0.9949, "step": 3424 }, { "epoch": 0.5035839000183789, "grad_norm": 0.4500722587108612, "learning_rate": 6.31031031031031e-05, "loss": 1.0176, "step": 3425 }, { "epoch": 0.5037309318140047, "grad_norm": 0.7153458595275879, "learning_rate": 6.306306306306306e-05, "loss": 1.1382, "step": 3426 }, { "epoch": 0.5038779636096306, "grad_norm": 0.4422489404678345, "learning_rate": 6.302302302302303e-05, "loss": 1.5958, "step": 3427 }, { "epoch": 0.5040249954052564, "grad_norm": 0.5556509494781494, "learning_rate": 6.298298298298299e-05, "loss": 1.0211, "step": 3428 }, { "epoch": 0.5041720272008822, "grad_norm": 0.809954047203064, "learning_rate": 6.294294294294295e-05, "loss": 0.7582, "step": 3429 }, { "epoch": 0.504319058996508, "grad_norm": 0.525018572807312, "learning_rate": 6.29029029029029e-05, "loss": 1.5247, "step": 3430 }, { "epoch": 0.5044660907921338, "grad_norm": 0.5444248914718628, "learning_rate": 6.286286286286288e-05, "loss": 1.482, "step": 3431 }, { "epoch": 0.5046131225877596, "grad_norm": 0.6122838258743286, "learning_rate": 6.282282282282282e-05, "loss": 1.1739, "step": 3432 }, { "epoch": 0.5047601543833854, "grad_norm": 0.6989633440971375, "learning_rate": 6.278278278278279e-05, "loss": 1.0407, "step": 3433 }, { "epoch": 0.5049071861790112, "grad_norm": 0.6445006728172302, "learning_rate": 6.274274274274275e-05, "loss": 1.3182, "step": 3434 }, { "epoch": 0.505054217974637, "grad_norm": 0.5189712643623352, "learning_rate": 6.27027027027027e-05, "loss": 1.0868, "step": 3435 }, { "epoch": 0.5052012497702628, "grad_norm": 0.6035285592079163, "learning_rate": 6.266266266266266e-05, "loss": 1.5966, "step": 3436 }, { "epoch": 0.5053482815658886, "grad_norm": 0.44259142875671387, "learning_rate": 6.262262262262263e-05, "loss": 1.7302, "step": 3437 }, { "epoch": 0.5054953133615144, "grad_norm": 0.5779199600219727, "learning_rate": 6.258258258258258e-05, "loss": 1.1472, "step": 3438 }, { "epoch": 0.5056423451571402, "grad_norm": 0.6569118499755859, "learning_rate": 6.254254254254255e-05, "loss": 0.9238, "step": 3439 }, { "epoch": 0.505789376952766, "grad_norm": 0.651832640171051, "learning_rate": 6.25025025025025e-05, "loss": 1.2318, "step": 3440 }, { "epoch": 0.5059364087483919, "grad_norm": 0.8026716113090515, "learning_rate": 6.246246246246246e-05, "loss": 0.9163, "step": 3441 }, { "epoch": 0.5060834405440177, "grad_norm": 0.6671211123466492, "learning_rate": 6.242242242242242e-05, "loss": 1.4128, "step": 3442 }, { "epoch": 0.5062304723396435, "grad_norm": 0.6032424569129944, "learning_rate": 6.238238238238239e-05, "loss": 0.9711, "step": 3443 }, { "epoch": 0.5063775041352693, "grad_norm": 0.4746956527233124, "learning_rate": 6.234234234234235e-05, "loss": 1.3055, "step": 3444 }, { "epoch": 0.5065245359308951, "grad_norm": 0.5585423111915588, "learning_rate": 6.23023023023023e-05, "loss": 1.4281, "step": 3445 }, { "epoch": 0.5066715677265209, "grad_norm": 0.663744330406189, "learning_rate": 6.226226226226226e-05, "loss": 1.2271, "step": 3446 }, { "epoch": 0.5068185995221467, "grad_norm": 0.4810066223144531, "learning_rate": 6.222222222222222e-05, "loss": 1.5876, "step": 3447 }, { "epoch": 0.5069656313177725, "grad_norm": 0.5046162605285645, "learning_rate": 6.218218218218218e-05, "loss": 1.1186, "step": 3448 }, { "epoch": 0.5071126631133983, "grad_norm": 0.6968781352043152, "learning_rate": 6.214214214214215e-05, "loss": 0.981, "step": 3449 }, { "epoch": 0.5072596949090241, "grad_norm": 0.44241976737976074, "learning_rate": 6.21021021021021e-05, "loss": 1.7512, "step": 3450 }, { "epoch": 0.5074067267046499, "grad_norm": 0.4946562945842743, "learning_rate": 6.206206206206206e-05, "loss": 1.4074, "step": 3451 }, { "epoch": 0.5075537585002757, "grad_norm": 0.57932448387146, "learning_rate": 6.202202202202203e-05, "loss": 1.0189, "step": 3452 }, { "epoch": 0.5077007902959015, "grad_norm": 0.7589049339294434, "learning_rate": 6.198198198198199e-05, "loss": 1.1152, "step": 3453 }, { "epoch": 0.5078478220915273, "grad_norm": 0.6586914658546448, "learning_rate": 6.194194194194195e-05, "loss": 0.9776, "step": 3454 }, { "epoch": 0.507994853887153, "grad_norm": 0.6272644996643066, "learning_rate": 6.19019019019019e-05, "loss": 1.6991, "step": 3455 }, { "epoch": 0.5081418856827788, "grad_norm": 0.5045684576034546, "learning_rate": 6.186186186186186e-05, "loss": 1.2731, "step": 3456 }, { "epoch": 0.5082889174784047, "grad_norm": 0.4400702118873596, "learning_rate": 6.182182182182182e-05, "loss": 1.7658, "step": 3457 }, { "epoch": 0.5084359492740305, "grad_norm": 0.424439013004303, "learning_rate": 6.178178178178179e-05, "loss": 1.1943, "step": 3458 }, { "epoch": 0.5085829810696563, "grad_norm": 0.7014318704605103, "learning_rate": 6.174174174174175e-05, "loss": 1.0408, "step": 3459 }, { "epoch": 0.5087300128652821, "grad_norm": 0.6236255764961243, "learning_rate": 6.17017017017017e-05, "loss": 1.2347, "step": 3460 }, { "epoch": 0.5088770446609079, "grad_norm": 0.4428516924381256, "learning_rate": 6.166166166166166e-05, "loss": 1.5243, "step": 3461 }, { "epoch": 0.5090240764565337, "grad_norm": 0.6391969919204712, "learning_rate": 6.162162162162163e-05, "loss": 0.8168, "step": 3462 }, { "epoch": 0.5091711082521595, "grad_norm": 0.6165295839309692, "learning_rate": 6.158158158158158e-05, "loss": 1.5404, "step": 3463 }, { "epoch": 0.5093181400477853, "grad_norm": 0.4388118386268616, "learning_rate": 6.154154154154155e-05, "loss": 1.6471, "step": 3464 }, { "epoch": 0.5094651718434111, "grad_norm": 0.8111442923545837, "learning_rate": 6.15015015015015e-05, "loss": 0.8949, "step": 3465 }, { "epoch": 0.5096122036390369, "grad_norm": 0.5232940316200256, "learning_rate": 6.146146146146146e-05, "loss": 1.0543, "step": 3466 }, { "epoch": 0.5097592354346627, "grad_norm": 0.5940284132957458, "learning_rate": 6.142142142142142e-05, "loss": 1.0599, "step": 3467 }, { "epoch": 0.5099062672302885, "grad_norm": 0.6476051807403564, "learning_rate": 6.138138138138139e-05, "loss": 0.9557, "step": 3468 }, { "epoch": 0.5100532990259143, "grad_norm": 0.7386900782585144, "learning_rate": 6.134134134134134e-05, "loss": 0.8362, "step": 3469 }, { "epoch": 0.5102003308215401, "grad_norm": 0.7591505646705627, "learning_rate": 6.13013013013013e-05, "loss": 1.3074, "step": 3470 }, { "epoch": 0.510347362617166, "grad_norm": 0.5274098515510559, "learning_rate": 6.126126126126126e-05, "loss": 1.2545, "step": 3471 }, { "epoch": 0.5104943944127918, "grad_norm": 0.4515654146671295, "learning_rate": 6.122122122122122e-05, "loss": 1.5054, "step": 3472 }, { "epoch": 0.5106414262084176, "grad_norm": 0.8166767954826355, "learning_rate": 6.118118118118118e-05, "loss": 1.04, "step": 3473 }, { "epoch": 0.5107884580040434, "grad_norm": 0.6336221694946289, "learning_rate": 6.114114114114115e-05, "loss": 0.8702, "step": 3474 }, { "epoch": 0.5109354897996692, "grad_norm": 0.5316667556762695, "learning_rate": 6.110110110110109e-05, "loss": 1.2413, "step": 3475 }, { "epoch": 0.511082521595295, "grad_norm": 0.9452571868896484, "learning_rate": 6.106106106106106e-05, "loss": 1.212, "step": 3476 }, { "epoch": 0.5112295533909208, "grad_norm": 0.6385164856910706, "learning_rate": 6.102102102102102e-05, "loss": 1.2356, "step": 3477 }, { "epoch": 0.5113765851865466, "grad_norm": 0.6466658115386963, "learning_rate": 6.0980980980980986e-05, "loss": 1.1388, "step": 3478 }, { "epoch": 0.5115236169821724, "grad_norm": 0.8952450752258301, "learning_rate": 6.094094094094095e-05, "loss": 1.0629, "step": 3479 }, { "epoch": 0.5116706487777982, "grad_norm": 0.7048832774162292, "learning_rate": 6.09009009009009e-05, "loss": 0.9007, "step": 3480 }, { "epoch": 0.511817680573424, "grad_norm": 0.4492886960506439, "learning_rate": 6.086086086086087e-05, "loss": 1.1377, "step": 3481 }, { "epoch": 0.5119647123690498, "grad_norm": 0.5716675519943237, "learning_rate": 6.082082082082082e-05, "loss": 0.8201, "step": 3482 }, { "epoch": 0.5121117441646756, "grad_norm": 0.7168245911598206, "learning_rate": 6.0780780780780786e-05, "loss": 1.1832, "step": 3483 }, { "epoch": 0.5122587759603014, "grad_norm": 0.7189253568649292, "learning_rate": 6.074074074074074e-05, "loss": 1.3019, "step": 3484 }, { "epoch": 0.5124058077559273, "grad_norm": 0.45033830404281616, "learning_rate": 6.070070070070071e-05, "loss": 1.1535, "step": 3485 }, { "epoch": 0.5125528395515531, "grad_norm": 0.46997418999671936, "learning_rate": 6.0660660660660665e-05, "loss": 1.1928, "step": 3486 }, { "epoch": 0.5126998713471789, "grad_norm": 0.5180462598800659, "learning_rate": 6.062062062062063e-05, "loss": 1.1418, "step": 3487 }, { "epoch": 0.5128469031428047, "grad_norm": 0.6874381899833679, "learning_rate": 6.058058058058058e-05, "loss": 1.0874, "step": 3488 }, { "epoch": 0.5129939349384305, "grad_norm": 0.8734959959983826, "learning_rate": 6.0540540540540543e-05, "loss": 0.9027, "step": 3489 }, { "epoch": 0.5131409667340563, "grad_norm": 0.7105079293251038, "learning_rate": 6.05005005005005e-05, "loss": 1.0851, "step": 3490 }, { "epoch": 0.5132879985296821, "grad_norm": 0.645311713218689, "learning_rate": 6.0460460460460465e-05, "loss": 1.2493, "step": 3491 }, { "epoch": 0.5134350303253078, "grad_norm": 0.6097210645675659, "learning_rate": 6.042042042042042e-05, "loss": 1.3197, "step": 3492 }, { "epoch": 0.5135820621209336, "grad_norm": 0.6779841184616089, "learning_rate": 6.0380380380380386e-05, "loss": 1.2786, "step": 3493 }, { "epoch": 0.5137290939165594, "grad_norm": 0.7479700446128845, "learning_rate": 6.034034034034034e-05, "loss": 0.945, "step": 3494 }, { "epoch": 0.5138761257121852, "grad_norm": 0.6203379034996033, "learning_rate": 6.030030030030031e-05, "loss": 1.3657, "step": 3495 }, { "epoch": 0.514023157507811, "grad_norm": 0.5273022651672363, "learning_rate": 6.026026026026026e-05, "loss": 1.0671, "step": 3496 }, { "epoch": 0.5141701893034368, "grad_norm": 0.45786240696907043, "learning_rate": 6.022022022022022e-05, "loss": 1.2436, "step": 3497 }, { "epoch": 0.5143172210990626, "grad_norm": 0.6037924289703369, "learning_rate": 6.018018018018018e-05, "loss": 1.0851, "step": 3498 }, { "epoch": 0.5144642528946884, "grad_norm": 0.5505682826042175, "learning_rate": 6.0140140140140144e-05, "loss": 0.9992, "step": 3499 }, { "epoch": 0.5146112846903143, "grad_norm": 0.8241732120513916, "learning_rate": 6.01001001001001e-05, "loss": 1.0626, "step": 3500 }, { "epoch": 0.5147583164859401, "grad_norm": 0.5072727799415588, "learning_rate": 6.0060060060060066e-05, "loss": 1.4246, "step": 3501 }, { "epoch": 0.5149053482815659, "grad_norm": 0.6405442357063293, "learning_rate": 6.0020020020020016e-05, "loss": 1.2556, "step": 3502 }, { "epoch": 0.5150523800771917, "grad_norm": 0.47144556045532227, "learning_rate": 5.997997997997998e-05, "loss": 1.0594, "step": 3503 }, { "epoch": 0.5151994118728175, "grad_norm": 0.6248560547828674, "learning_rate": 5.993993993993994e-05, "loss": 1.2585, "step": 3504 }, { "epoch": 0.5153464436684433, "grad_norm": 0.5109277963638306, "learning_rate": 5.98998998998999e-05, "loss": 1.3411, "step": 3505 }, { "epoch": 0.5154934754640691, "grad_norm": 0.7368922829627991, "learning_rate": 5.9859859859859866e-05, "loss": 1.1539, "step": 3506 }, { "epoch": 0.5156405072596949, "grad_norm": 0.6708735823631287, "learning_rate": 5.981981981981982e-05, "loss": 1.1611, "step": 3507 }, { "epoch": 0.5157875390553207, "grad_norm": 0.4145963490009308, "learning_rate": 5.977977977977979e-05, "loss": 1.6129, "step": 3508 }, { "epoch": 0.5159345708509465, "grad_norm": 0.8877409100532532, "learning_rate": 5.9739739739739745e-05, "loss": 0.9593, "step": 3509 }, { "epoch": 0.5160816026465723, "grad_norm": 0.8116965889930725, "learning_rate": 5.969969969969971e-05, "loss": 1.1481, "step": 3510 }, { "epoch": 0.5162286344421981, "grad_norm": 0.46372586488723755, "learning_rate": 5.965965965965966e-05, "loss": 1.5636, "step": 3511 }, { "epoch": 0.5163756662378239, "grad_norm": 0.7037585377693176, "learning_rate": 5.9619619619619623e-05, "loss": 1.1471, "step": 3512 }, { "epoch": 0.5165226980334497, "grad_norm": 0.6326743960380554, "learning_rate": 5.957957957957958e-05, "loss": 1.122, "step": 3513 }, { "epoch": 0.5166697298290756, "grad_norm": 0.5878100991249084, "learning_rate": 5.9539539539539545e-05, "loss": 1.2329, "step": 3514 }, { "epoch": 0.5168167616247014, "grad_norm": 0.44072312116622925, "learning_rate": 5.94994994994995e-05, "loss": 1.1764, "step": 3515 }, { "epoch": 0.5169637934203272, "grad_norm": 0.6039829254150391, "learning_rate": 5.9459459459459466e-05, "loss": 1.2007, "step": 3516 }, { "epoch": 0.517110825215953, "grad_norm": 0.5231195092201233, "learning_rate": 5.941941941941942e-05, "loss": 1.2344, "step": 3517 }, { "epoch": 0.5172578570115788, "grad_norm": 0.6178322434425354, "learning_rate": 5.937937937937939e-05, "loss": 1.2681, "step": 3518 }, { "epoch": 0.5174048888072046, "grad_norm": 0.49223384261131287, "learning_rate": 5.933933933933934e-05, "loss": 1.0767, "step": 3519 }, { "epoch": 0.5175519206028304, "grad_norm": 0.5918794870376587, "learning_rate": 5.92992992992993e-05, "loss": 1.4411, "step": 3520 }, { "epoch": 0.5176989523984562, "grad_norm": 0.5065402388572693, "learning_rate": 5.925925925925926e-05, "loss": 1.1782, "step": 3521 }, { "epoch": 0.517845984194082, "grad_norm": 0.500900149345398, "learning_rate": 5.9219219219219224e-05, "loss": 1.1586, "step": 3522 }, { "epoch": 0.5179930159897078, "grad_norm": 0.42036041617393494, "learning_rate": 5.917917917917918e-05, "loss": 1.5843, "step": 3523 }, { "epoch": 0.5181400477853336, "grad_norm": 0.6629079580307007, "learning_rate": 5.9139139139139145e-05, "loss": 1.1919, "step": 3524 }, { "epoch": 0.5182870795809594, "grad_norm": 0.5942605137825012, "learning_rate": 5.9099099099099096e-05, "loss": 1.0043, "step": 3525 }, { "epoch": 0.5184341113765852, "grad_norm": 0.5495783090591431, "learning_rate": 5.905905905905906e-05, "loss": 1.3566, "step": 3526 }, { "epoch": 0.518581143172211, "grad_norm": 0.5375892519950867, "learning_rate": 5.901901901901902e-05, "loss": 1.096, "step": 3527 }, { "epoch": 0.5187281749678367, "grad_norm": 0.7232728600502014, "learning_rate": 5.897897897897898e-05, "loss": 0.8749, "step": 3528 }, { "epoch": 0.5188752067634625, "grad_norm": 0.4499412775039673, "learning_rate": 5.893893893893894e-05, "loss": 1.56, "step": 3529 }, { "epoch": 0.5190222385590884, "grad_norm": 0.4536270797252655, "learning_rate": 5.88988988988989e-05, "loss": 1.2796, "step": 3530 }, { "epoch": 0.5191692703547142, "grad_norm": 0.5747660994529724, "learning_rate": 5.8858858858858854e-05, "loss": 1.3089, "step": 3531 }, { "epoch": 0.51931630215034, "grad_norm": 0.7154669165611267, "learning_rate": 5.8818818818818825e-05, "loss": 1.2662, "step": 3532 }, { "epoch": 0.5194633339459658, "grad_norm": 0.6344802379608154, "learning_rate": 5.877877877877879e-05, "loss": 0.9115, "step": 3533 }, { "epoch": 0.5196103657415916, "grad_norm": 0.6202223300933838, "learning_rate": 5.873873873873874e-05, "loss": 0.9948, "step": 3534 }, { "epoch": 0.5197573975372174, "grad_norm": 0.5599839091300964, "learning_rate": 5.86986986986987e-05, "loss": 1.0203, "step": 3535 }, { "epoch": 0.5199044293328432, "grad_norm": 0.6666040420532227, "learning_rate": 5.865865865865866e-05, "loss": 1.1914, "step": 3536 }, { "epoch": 0.520051461128469, "grad_norm": 0.4874829351902008, "learning_rate": 5.8618618618618625e-05, "loss": 1.2889, "step": 3537 }, { "epoch": 0.5201984929240948, "grad_norm": 0.6781805753707886, "learning_rate": 5.857857857857858e-05, "loss": 0.9069, "step": 3538 }, { "epoch": 0.5203455247197206, "grad_norm": 0.5251201391220093, "learning_rate": 5.8538538538538546e-05, "loss": 1.1961, "step": 3539 }, { "epoch": 0.5204925565153464, "grad_norm": 0.6371070742607117, "learning_rate": 5.84984984984985e-05, "loss": 1.0523, "step": 3540 }, { "epoch": 0.5206395883109722, "grad_norm": 0.6118427515029907, "learning_rate": 5.845845845845847e-05, "loss": 0.9605, "step": 3541 }, { "epoch": 0.520786620106598, "grad_norm": 0.43155187368392944, "learning_rate": 5.841841841841842e-05, "loss": 1.1146, "step": 3542 }, { "epoch": 0.5209336519022238, "grad_norm": 0.7759458422660828, "learning_rate": 5.837837837837838e-05, "loss": 1.0, "step": 3543 }, { "epoch": 0.5210806836978497, "grad_norm": 0.6543759107589722, "learning_rate": 5.833833833833834e-05, "loss": 1.0516, "step": 3544 }, { "epoch": 0.5212277154934755, "grad_norm": 0.6853777766227722, "learning_rate": 5.8298298298298304e-05, "loss": 1.2288, "step": 3545 }, { "epoch": 0.5213747472891013, "grad_norm": 0.7425467371940613, "learning_rate": 5.825825825825826e-05, "loss": 1.0063, "step": 3546 }, { "epoch": 0.5215217790847271, "grad_norm": 0.5876864790916443, "learning_rate": 5.8218218218218225e-05, "loss": 1.1412, "step": 3547 }, { "epoch": 0.5216688108803529, "grad_norm": 0.9265879392623901, "learning_rate": 5.8178178178178176e-05, "loss": 1.1729, "step": 3548 }, { "epoch": 0.5218158426759787, "grad_norm": 0.5665740966796875, "learning_rate": 5.813813813813814e-05, "loss": 1.1654, "step": 3549 }, { "epoch": 0.5219628744716045, "grad_norm": 0.4800000488758087, "learning_rate": 5.80980980980981e-05, "loss": 1.1631, "step": 3550 }, { "epoch": 0.5221099062672303, "grad_norm": 0.6386014223098755, "learning_rate": 5.805805805805806e-05, "loss": 1.2915, "step": 3551 }, { "epoch": 0.5222569380628561, "grad_norm": 0.9018503427505493, "learning_rate": 5.801801801801802e-05, "loss": 1.1597, "step": 3552 }, { "epoch": 0.5224039698584819, "grad_norm": 0.5579634308815002, "learning_rate": 5.797797797797798e-05, "loss": 0.9787, "step": 3553 }, { "epoch": 0.5225510016541077, "grad_norm": 0.4816688895225525, "learning_rate": 5.7937937937937934e-05, "loss": 1.6892, "step": 3554 }, { "epoch": 0.5226980334497335, "grad_norm": 0.5390140414237976, "learning_rate": 5.7897897897897904e-05, "loss": 1.3818, "step": 3555 }, { "epoch": 0.5228450652453593, "grad_norm": 0.5151313543319702, "learning_rate": 5.7857857857857855e-05, "loss": 0.9841, "step": 3556 }, { "epoch": 0.5229920970409851, "grad_norm": 0.643791675567627, "learning_rate": 5.781781781781782e-05, "loss": 1.3553, "step": 3557 }, { "epoch": 0.523139128836611, "grad_norm": 0.45427975058555603, "learning_rate": 5.7777777777777776e-05, "loss": 2.2036, "step": 3558 }, { "epoch": 0.5232861606322368, "grad_norm": 0.5086284875869751, "learning_rate": 5.773773773773774e-05, "loss": 1.208, "step": 3559 }, { "epoch": 0.5234331924278626, "grad_norm": 0.591441810131073, "learning_rate": 5.7697697697697705e-05, "loss": 1.1693, "step": 3560 }, { "epoch": 0.5235802242234884, "grad_norm": 0.4512271285057068, "learning_rate": 5.765765765765766e-05, "loss": 1.2596, "step": 3561 }, { "epoch": 0.5237272560191142, "grad_norm": 0.5502907037734985, "learning_rate": 5.7617617617617626e-05, "loss": 0.972, "step": 3562 }, { "epoch": 0.52387428781474, "grad_norm": 0.46586015820503235, "learning_rate": 5.757757757757758e-05, "loss": 1.2029, "step": 3563 }, { "epoch": 0.5240213196103657, "grad_norm": 0.6084601283073425, "learning_rate": 5.753753753753755e-05, "loss": 1.0932, "step": 3564 }, { "epoch": 0.5241683514059915, "grad_norm": 0.5577886700630188, "learning_rate": 5.74974974974975e-05, "loss": 1.1442, "step": 3565 }, { "epoch": 0.5243153832016173, "grad_norm": 0.43561816215515137, "learning_rate": 5.745745745745746e-05, "loss": 1.5653, "step": 3566 }, { "epoch": 0.5244624149972431, "grad_norm": 0.3822243809700012, "learning_rate": 5.741741741741742e-05, "loss": 1.6298, "step": 3567 }, { "epoch": 0.5246094467928689, "grad_norm": 0.629839301109314, "learning_rate": 5.7377377377377384e-05, "loss": 1.0085, "step": 3568 }, { "epoch": 0.5247564785884947, "grad_norm": 0.505456268787384, "learning_rate": 5.733733733733734e-05, "loss": 1.2841, "step": 3569 }, { "epoch": 0.5249035103841205, "grad_norm": 0.541828453540802, "learning_rate": 5.7297297297297305e-05, "loss": 1.2553, "step": 3570 }, { "epoch": 0.5250505421797463, "grad_norm": 0.8095776438713074, "learning_rate": 5.7257257257257256e-05, "loss": 0.8604, "step": 3571 }, { "epoch": 0.5251975739753721, "grad_norm": 0.6028627753257751, "learning_rate": 5.721721721721722e-05, "loss": 1.0188, "step": 3572 }, { "epoch": 0.525344605770998, "grad_norm": 0.4369587004184723, "learning_rate": 5.717717717717718e-05, "loss": 1.2773, "step": 3573 }, { "epoch": 0.5254916375666238, "grad_norm": 0.429084837436676, "learning_rate": 5.713713713713714e-05, "loss": 1.4486, "step": 3574 }, { "epoch": 0.5256386693622496, "grad_norm": 0.6921045184135437, "learning_rate": 5.70970970970971e-05, "loss": 1.3826, "step": 3575 }, { "epoch": 0.5257857011578754, "grad_norm": 0.5425163507461548, "learning_rate": 5.705705705705706e-05, "loss": 1.1994, "step": 3576 }, { "epoch": 0.5259327329535012, "grad_norm": 0.639798104763031, "learning_rate": 5.7017017017017013e-05, "loss": 1.0887, "step": 3577 }, { "epoch": 0.526079764749127, "grad_norm": 0.4380558133125305, "learning_rate": 5.6976976976976984e-05, "loss": 0.9378, "step": 3578 }, { "epoch": 0.5262267965447528, "grad_norm": 0.5351855158805847, "learning_rate": 5.6936936936936935e-05, "loss": 1.2184, "step": 3579 }, { "epoch": 0.5263738283403786, "grad_norm": 0.7447459697723389, "learning_rate": 5.68968968968969e-05, "loss": 1.0685, "step": 3580 }, { "epoch": 0.5265208601360044, "grad_norm": 0.6774980425834656, "learning_rate": 5.6856856856856856e-05, "loss": 1.3049, "step": 3581 }, { "epoch": 0.5266678919316302, "grad_norm": 0.5380317568778992, "learning_rate": 5.681681681681682e-05, "loss": 1.4254, "step": 3582 }, { "epoch": 0.526814923727256, "grad_norm": 0.5080539584159851, "learning_rate": 5.677677677677677e-05, "loss": 1.1202, "step": 3583 }, { "epoch": 0.5269619555228818, "grad_norm": 0.7344757318496704, "learning_rate": 5.673673673673674e-05, "loss": 1.4981, "step": 3584 }, { "epoch": 0.5271089873185076, "grad_norm": 0.7988519668579102, "learning_rate": 5.669669669669669e-05, "loss": 0.9378, "step": 3585 }, { "epoch": 0.5272560191141334, "grad_norm": 0.7110586762428284, "learning_rate": 5.665665665665666e-05, "loss": 1.5849, "step": 3586 }, { "epoch": 0.5274030509097593, "grad_norm": 0.6073824763298035, "learning_rate": 5.661661661661663e-05, "loss": 1.3607, "step": 3587 }, { "epoch": 0.5275500827053851, "grad_norm": 0.5889189839363098, "learning_rate": 5.657657657657658e-05, "loss": 1.3108, "step": 3588 }, { "epoch": 0.5276971145010109, "grad_norm": 0.5417584180831909, "learning_rate": 5.653653653653654e-05, "loss": 1.2627, "step": 3589 }, { "epoch": 0.5278441462966367, "grad_norm": 0.4636939465999603, "learning_rate": 5.64964964964965e-05, "loss": 1.3509, "step": 3590 }, { "epoch": 0.5279911780922625, "grad_norm": 0.7365936040878296, "learning_rate": 5.6456456456456464e-05, "loss": 0.866, "step": 3591 }, { "epoch": 0.5281382098878883, "grad_norm": 0.723586916923523, "learning_rate": 5.641641641641642e-05, "loss": 1.2226, "step": 3592 }, { "epoch": 0.5282852416835141, "grad_norm": 0.5160314440727234, "learning_rate": 5.6376376376376385e-05, "loss": 1.5413, "step": 3593 }, { "epoch": 0.5284322734791399, "grad_norm": 0.48671624064445496, "learning_rate": 5.6336336336336336e-05, "loss": 1.5519, "step": 3594 }, { "epoch": 0.5285793052747657, "grad_norm": 0.7885681986808777, "learning_rate": 5.62962962962963e-05, "loss": 1.0984, "step": 3595 }, { "epoch": 0.5287263370703915, "grad_norm": 0.7653728723526001, "learning_rate": 5.625625625625626e-05, "loss": 0.9679, "step": 3596 }, { "epoch": 0.5288733688660173, "grad_norm": 0.48940494656562805, "learning_rate": 5.621621621621622e-05, "loss": 1.2745, "step": 3597 }, { "epoch": 0.5290204006616431, "grad_norm": 0.6457929611206055, "learning_rate": 5.617617617617618e-05, "loss": 1.1751, "step": 3598 }, { "epoch": 0.5291674324572689, "grad_norm": 0.4328003525733948, "learning_rate": 5.613613613613614e-05, "loss": 1.707, "step": 3599 }, { "epoch": 0.5293144642528946, "grad_norm": 0.6913061738014221, "learning_rate": 5.6096096096096093e-05, "loss": 1.0122, "step": 3600 }, { "epoch": 0.5294614960485204, "grad_norm": 0.5222436785697937, "learning_rate": 5.6056056056056064e-05, "loss": 1.0608, "step": 3601 }, { "epoch": 0.5296085278441462, "grad_norm": 0.5995531678199768, "learning_rate": 5.6016016016016015e-05, "loss": 1.0448, "step": 3602 }, { "epoch": 0.5297555596397721, "grad_norm": 0.597984254360199, "learning_rate": 5.597597597597598e-05, "loss": 1.0931, "step": 3603 }, { "epoch": 0.5299025914353979, "grad_norm": 0.602614164352417, "learning_rate": 5.5935935935935936e-05, "loss": 0.8028, "step": 3604 }, { "epoch": 0.5300496232310237, "grad_norm": 0.6794304251670837, "learning_rate": 5.58958958958959e-05, "loss": 1.2161, "step": 3605 }, { "epoch": 0.5301966550266495, "grad_norm": 0.5121974945068359, "learning_rate": 5.585585585585585e-05, "loss": 1.2959, "step": 3606 }, { "epoch": 0.5303436868222753, "grad_norm": 0.6621078252792358, "learning_rate": 5.581581581581582e-05, "loss": 0.9394, "step": 3607 }, { "epoch": 0.5304907186179011, "grad_norm": 0.7045146226882935, "learning_rate": 5.577577577577577e-05, "loss": 1.2686, "step": 3608 }, { "epoch": 0.5306377504135269, "grad_norm": 0.6521766781806946, "learning_rate": 5.573573573573574e-05, "loss": 0.82, "step": 3609 }, { "epoch": 0.5307847822091527, "grad_norm": 0.6223867535591125, "learning_rate": 5.5695695695695694e-05, "loss": 1.0356, "step": 3610 }, { "epoch": 0.5309318140047785, "grad_norm": 0.6243739724159241, "learning_rate": 5.565565565565566e-05, "loss": 1.0185, "step": 3611 }, { "epoch": 0.5310788458004043, "grad_norm": 0.535775899887085, "learning_rate": 5.5615615615615615e-05, "loss": 1.2088, "step": 3612 }, { "epoch": 0.5312258775960301, "grad_norm": 0.6416396498680115, "learning_rate": 5.557557557557558e-05, "loss": 1.0618, "step": 3613 }, { "epoch": 0.5313729093916559, "grad_norm": 0.6591413617134094, "learning_rate": 5.5535535535535544e-05, "loss": 1.0923, "step": 3614 }, { "epoch": 0.5315199411872817, "grad_norm": 0.8087542057037354, "learning_rate": 5.5495495495495494e-05, "loss": 1.2397, "step": 3615 }, { "epoch": 0.5316669729829075, "grad_norm": 0.5614745616912842, "learning_rate": 5.5455455455455465e-05, "loss": 1.0337, "step": 3616 }, { "epoch": 0.5318140047785334, "grad_norm": 0.5401776432991028, "learning_rate": 5.5415415415415416e-05, "loss": 0.6157, "step": 3617 }, { "epoch": 0.5319610365741592, "grad_norm": 0.5105632543563843, "learning_rate": 5.537537537537538e-05, "loss": 1.3715, "step": 3618 }, { "epoch": 0.532108068369785, "grad_norm": 0.8023272156715393, "learning_rate": 5.533533533533534e-05, "loss": 0.9274, "step": 3619 }, { "epoch": 0.5322551001654108, "grad_norm": 0.5974544882774353, "learning_rate": 5.52952952952953e-05, "loss": 2.4546, "step": 3620 }, { "epoch": 0.5324021319610366, "grad_norm": 0.5346347093582153, "learning_rate": 5.525525525525526e-05, "loss": 1.2374, "step": 3621 }, { "epoch": 0.5325491637566624, "grad_norm": 0.4865289628505707, "learning_rate": 5.521521521521522e-05, "loss": 1.5393, "step": 3622 }, { "epoch": 0.5326961955522882, "grad_norm": 0.6099830269813538, "learning_rate": 5.517517517517517e-05, "loss": 1.0077, "step": 3623 }, { "epoch": 0.532843227347914, "grad_norm": 0.6731157302856445, "learning_rate": 5.5135135135135144e-05, "loss": 1.1562, "step": 3624 }, { "epoch": 0.5329902591435398, "grad_norm": 0.6693265438079834, "learning_rate": 5.5095095095095095e-05, "loss": 0.9389, "step": 3625 }, { "epoch": 0.5331372909391656, "grad_norm": 0.7635179162025452, "learning_rate": 5.505505505505506e-05, "loss": 1.0864, "step": 3626 }, { "epoch": 0.5332843227347914, "grad_norm": 0.5837327837944031, "learning_rate": 5.5015015015015016e-05, "loss": 1.4156, "step": 3627 }, { "epoch": 0.5334313545304172, "grad_norm": 0.6503902077674866, "learning_rate": 5.497497497497498e-05, "loss": 1.257, "step": 3628 }, { "epoch": 0.533578386326043, "grad_norm": 0.9417732357978821, "learning_rate": 5.493493493493493e-05, "loss": 0.891, "step": 3629 }, { "epoch": 0.5337254181216688, "grad_norm": 0.8136228322982788, "learning_rate": 5.48948948948949e-05, "loss": 1.1347, "step": 3630 }, { "epoch": 0.5338724499172947, "grad_norm": 0.6868734955787659, "learning_rate": 5.485485485485485e-05, "loss": 1.1599, "step": 3631 }, { "epoch": 0.5340194817129205, "grad_norm": 0.6001800298690796, "learning_rate": 5.4814814814814817e-05, "loss": 1.058, "step": 3632 }, { "epoch": 0.5341665135085463, "grad_norm": 0.5587781667709351, "learning_rate": 5.4774774774774774e-05, "loss": 1.2103, "step": 3633 }, { "epoch": 0.5343135453041721, "grad_norm": 0.6720632910728455, "learning_rate": 5.473473473473474e-05, "loss": 1.4176, "step": 3634 }, { "epoch": 0.5344605770997979, "grad_norm": 0.6344841718673706, "learning_rate": 5.4694694694694695e-05, "loss": 1.0373, "step": 3635 }, { "epoch": 0.5346076088954236, "grad_norm": 0.7393881678581238, "learning_rate": 5.465465465465466e-05, "loss": 1.1405, "step": 3636 }, { "epoch": 0.5347546406910494, "grad_norm": 0.523049533367157, "learning_rate": 5.461461461461461e-05, "loss": 1.1098, "step": 3637 }, { "epoch": 0.5349016724866752, "grad_norm": 0.6577274203300476, "learning_rate": 5.4574574574574574e-05, "loss": 0.9411, "step": 3638 }, { "epoch": 0.535048704282301, "grad_norm": 0.6388079524040222, "learning_rate": 5.4534534534534545e-05, "loss": 0.8485, "step": 3639 }, { "epoch": 0.5351957360779268, "grad_norm": 0.4281010925769806, "learning_rate": 5.4494494494494496e-05, "loss": 1.4311, "step": 3640 }, { "epoch": 0.5353427678735526, "grad_norm": 0.5078662633895874, "learning_rate": 5.445445445445446e-05, "loss": 0.8098, "step": 3641 }, { "epoch": 0.5354897996691784, "grad_norm": 0.5689630508422852, "learning_rate": 5.441441441441442e-05, "loss": 1.1511, "step": 3642 }, { "epoch": 0.5356368314648042, "grad_norm": 0.6395360231399536, "learning_rate": 5.437437437437438e-05, "loss": 1.2601, "step": 3643 }, { "epoch": 0.53578386326043, "grad_norm": 0.5007621049880981, "learning_rate": 5.433433433433434e-05, "loss": 0.8029, "step": 3644 }, { "epoch": 0.5359308950560558, "grad_norm": 0.4694121181964874, "learning_rate": 5.42942942942943e-05, "loss": 0.9738, "step": 3645 }, { "epoch": 0.5360779268516817, "grad_norm": 0.7048468589782715, "learning_rate": 5.425425425425425e-05, "loss": 1.1029, "step": 3646 }, { "epoch": 0.5362249586473075, "grad_norm": 0.6925604343414307, "learning_rate": 5.421421421421422e-05, "loss": 1.2095, "step": 3647 }, { "epoch": 0.5363719904429333, "grad_norm": 0.8251738548278809, "learning_rate": 5.4174174174174175e-05, "loss": 1.0286, "step": 3648 }, { "epoch": 0.5365190222385591, "grad_norm": 0.772769033908844, "learning_rate": 5.413413413413414e-05, "loss": 1.1302, "step": 3649 }, { "epoch": 0.5366660540341849, "grad_norm": 0.7360385060310364, "learning_rate": 5.4094094094094096e-05, "loss": 0.9933, "step": 3650 }, { "epoch": 0.5368130858298107, "grad_norm": 0.6162272095680237, "learning_rate": 5.405405405405406e-05, "loss": 0.9435, "step": 3651 }, { "epoch": 0.5369601176254365, "grad_norm": 0.6425223350524902, "learning_rate": 5.401401401401401e-05, "loss": 1.2023, "step": 3652 }, { "epoch": 0.5371071494210623, "grad_norm": 0.631152868270874, "learning_rate": 5.397397397397398e-05, "loss": 1.0636, "step": 3653 }, { "epoch": 0.5372541812166881, "grad_norm": 0.6232051849365234, "learning_rate": 5.393393393393393e-05, "loss": 1.306, "step": 3654 }, { "epoch": 0.5374012130123139, "grad_norm": 0.7407537698745728, "learning_rate": 5.3893893893893896e-05, "loss": 1.1706, "step": 3655 }, { "epoch": 0.5375482448079397, "grad_norm": 0.6129488945007324, "learning_rate": 5.3853853853853854e-05, "loss": 1.2856, "step": 3656 }, { "epoch": 0.5376952766035655, "grad_norm": 0.6362156271934509, "learning_rate": 5.381381381381382e-05, "loss": 1.29, "step": 3657 }, { "epoch": 0.5378423083991913, "grad_norm": 0.5958859324455261, "learning_rate": 5.3773773773773775e-05, "loss": 1.2053, "step": 3658 }, { "epoch": 0.5379893401948171, "grad_norm": 0.5271769165992737, "learning_rate": 5.373373373373374e-05, "loss": 0.8543, "step": 3659 }, { "epoch": 0.538136371990443, "grad_norm": 0.4858749508857727, "learning_rate": 5.369369369369369e-05, "loss": 1.0393, "step": 3660 }, { "epoch": 0.5382834037860688, "grad_norm": 0.46139785647392273, "learning_rate": 5.3653653653653654e-05, "loss": 1.3295, "step": 3661 }, { "epoch": 0.5384304355816946, "grad_norm": 0.7746020555496216, "learning_rate": 5.361361361361361e-05, "loss": 1.1434, "step": 3662 }, { "epoch": 0.5385774673773204, "grad_norm": 0.7683274149894714, "learning_rate": 5.3573573573573576e-05, "loss": 0.9665, "step": 3663 }, { "epoch": 0.5387244991729462, "grad_norm": 0.5314139127731323, "learning_rate": 5.353353353353353e-05, "loss": 1.0107, "step": 3664 }, { "epoch": 0.538871530968572, "grad_norm": 0.49485889077186584, "learning_rate": 5.34934934934935e-05, "loss": 1.5926, "step": 3665 }, { "epoch": 0.5390185627641978, "grad_norm": 0.5833888649940491, "learning_rate": 5.345345345345346e-05, "loss": 0.9676, "step": 3666 }, { "epoch": 0.5391655945598236, "grad_norm": 0.6711292266845703, "learning_rate": 5.341341341341342e-05, "loss": 1.3584, "step": 3667 }, { "epoch": 0.5393126263554494, "grad_norm": 0.5987687706947327, "learning_rate": 5.337337337337338e-05, "loss": 1.0446, "step": 3668 }, { "epoch": 0.5394596581510752, "grad_norm": 0.6844998598098755, "learning_rate": 5.333333333333333e-05, "loss": 1.3498, "step": 3669 }, { "epoch": 0.539606689946701, "grad_norm": 0.4995625913143158, "learning_rate": 5.32932932932933e-05, "loss": 1.0302, "step": 3670 }, { "epoch": 0.5397537217423268, "grad_norm": 0.6835212707519531, "learning_rate": 5.3253253253253255e-05, "loss": 1.1672, "step": 3671 }, { "epoch": 0.5399007535379525, "grad_norm": 0.5624462366104126, "learning_rate": 5.321321321321322e-05, "loss": 1.1048, "step": 3672 }, { "epoch": 0.5400477853335783, "grad_norm": 0.5400373935699463, "learning_rate": 5.3173173173173176e-05, "loss": 1.0529, "step": 3673 }, { "epoch": 0.5401948171292041, "grad_norm": 0.5739070773124695, "learning_rate": 5.313313313313314e-05, "loss": 1.056, "step": 3674 }, { "epoch": 0.54034184892483, "grad_norm": 0.5308019518852234, "learning_rate": 5.309309309309309e-05, "loss": 1.593, "step": 3675 }, { "epoch": 0.5404888807204558, "grad_norm": 0.7184430956840515, "learning_rate": 5.305305305305306e-05, "loss": 0.9937, "step": 3676 }, { "epoch": 0.5406359125160816, "grad_norm": 0.43543827533721924, "learning_rate": 5.301301301301301e-05, "loss": 1.3133, "step": 3677 }, { "epoch": 0.5407829443117074, "grad_norm": 0.5930517911911011, "learning_rate": 5.2972972972972976e-05, "loss": 1.0963, "step": 3678 }, { "epoch": 0.5409299761073332, "grad_norm": 0.41752052307128906, "learning_rate": 5.2932932932932934e-05, "loss": 1.9143, "step": 3679 }, { "epoch": 0.541077007902959, "grad_norm": 0.6985964775085449, "learning_rate": 5.28928928928929e-05, "loss": 1.0612, "step": 3680 }, { "epoch": 0.5412240396985848, "grad_norm": 0.7277359962463379, "learning_rate": 5.2852852852852855e-05, "loss": 0.9906, "step": 3681 }, { "epoch": 0.5413710714942106, "grad_norm": 0.6157042980194092, "learning_rate": 5.281281281281282e-05, "loss": 1.1903, "step": 3682 }, { "epoch": 0.5415181032898364, "grad_norm": 0.6057680249214172, "learning_rate": 5.277277277277277e-05, "loss": 1.2368, "step": 3683 }, { "epoch": 0.5416651350854622, "grad_norm": 0.535791277885437, "learning_rate": 5.2732732732732734e-05, "loss": 1.0364, "step": 3684 }, { "epoch": 0.541812166881088, "grad_norm": 0.579521894454956, "learning_rate": 5.269269269269269e-05, "loss": 1.0288, "step": 3685 }, { "epoch": 0.5419591986767138, "grad_norm": 0.5941701531410217, "learning_rate": 5.2652652652652655e-05, "loss": 1.3169, "step": 3686 }, { "epoch": 0.5421062304723396, "grad_norm": 0.6428045034408569, "learning_rate": 5.261261261261261e-05, "loss": 1.1807, "step": 3687 }, { "epoch": 0.5422532622679654, "grad_norm": 0.4917246401309967, "learning_rate": 5.257257257257258e-05, "loss": 1.5602, "step": 3688 }, { "epoch": 0.5424002940635912, "grad_norm": 0.6993679404258728, "learning_rate": 5.253253253253253e-05, "loss": 0.9518, "step": 3689 }, { "epoch": 0.5425473258592171, "grad_norm": 0.5685787200927734, "learning_rate": 5.24924924924925e-05, "loss": 1.4174, "step": 3690 }, { "epoch": 0.5426943576548429, "grad_norm": 0.6392562985420227, "learning_rate": 5.245245245245245e-05, "loss": 1.1858, "step": 3691 }, { "epoch": 0.5428413894504687, "grad_norm": 0.7405918836593628, "learning_rate": 5.241241241241241e-05, "loss": 1.0434, "step": 3692 }, { "epoch": 0.5429884212460945, "grad_norm": 0.6903685331344604, "learning_rate": 5.237237237237238e-05, "loss": 1.0692, "step": 3693 }, { "epoch": 0.5431354530417203, "grad_norm": 0.5000661015510559, "learning_rate": 5.2332332332332335e-05, "loss": 0.8141, "step": 3694 }, { "epoch": 0.5432824848373461, "grad_norm": 0.5608515739440918, "learning_rate": 5.22922922922923e-05, "loss": 1.4224, "step": 3695 }, { "epoch": 0.5434295166329719, "grad_norm": 0.5853215456008911, "learning_rate": 5.2252252252252256e-05, "loss": 1.401, "step": 3696 }, { "epoch": 0.5435765484285977, "grad_norm": 0.7185789942741394, "learning_rate": 5.221221221221222e-05, "loss": 1.0465, "step": 3697 }, { "epoch": 0.5437235802242235, "grad_norm": 0.7161624431610107, "learning_rate": 5.217217217217217e-05, "loss": 0.9226, "step": 3698 }, { "epoch": 0.5438706120198493, "grad_norm": 0.6661717295646667, "learning_rate": 5.213213213213214e-05, "loss": 0.9748, "step": 3699 }, { "epoch": 0.5440176438154751, "grad_norm": 0.45719513297080994, "learning_rate": 5.209209209209209e-05, "loss": 1.4699, "step": 3700 }, { "epoch": 0.5441646756111009, "grad_norm": 0.43497124314308167, "learning_rate": 5.2052052052052056e-05, "loss": 1.2144, "step": 3701 }, { "epoch": 0.5443117074067267, "grad_norm": 0.7604093551635742, "learning_rate": 5.2012012012012014e-05, "loss": 1.1291, "step": 3702 }, { "epoch": 0.5444587392023525, "grad_norm": 0.513003945350647, "learning_rate": 5.197197197197198e-05, "loss": 0.9108, "step": 3703 }, { "epoch": 0.5446057709979784, "grad_norm": 0.5354130864143372, "learning_rate": 5.1931931931931935e-05, "loss": 0.9468, "step": 3704 }, { "epoch": 0.5447528027936042, "grad_norm": 0.7326198220252991, "learning_rate": 5.18918918918919e-05, "loss": 0.9616, "step": 3705 }, { "epoch": 0.54489983458923, "grad_norm": 0.6111709475517273, "learning_rate": 5.185185185185185e-05, "loss": 1.3487, "step": 3706 }, { "epoch": 0.5450468663848558, "grad_norm": 0.5276011824607849, "learning_rate": 5.1811811811811814e-05, "loss": 1.2162, "step": 3707 }, { "epoch": 0.5451938981804816, "grad_norm": 0.9549554586410522, "learning_rate": 5.177177177177177e-05, "loss": 1.0642, "step": 3708 }, { "epoch": 0.5453409299761073, "grad_norm": 0.7314489483833313, "learning_rate": 5.1731731731731735e-05, "loss": 0.968, "step": 3709 }, { "epoch": 0.5454879617717331, "grad_norm": 0.7657334208488464, "learning_rate": 5.169169169169169e-05, "loss": 1.1486, "step": 3710 }, { "epoch": 0.5456349935673589, "grad_norm": 0.7441298365592957, "learning_rate": 5.165165165165166e-05, "loss": 0.8303, "step": 3711 }, { "epoch": 0.5457820253629847, "grad_norm": 0.616892397403717, "learning_rate": 5.161161161161161e-05, "loss": 1.2614, "step": 3712 }, { "epoch": 0.5459290571586105, "grad_norm": 0.7286609411239624, "learning_rate": 5.157157157157158e-05, "loss": 1.0315, "step": 3713 }, { "epoch": 0.5460760889542363, "grad_norm": 0.6800589561462402, "learning_rate": 5.153153153153153e-05, "loss": 1.0183, "step": 3714 }, { "epoch": 0.5462231207498621, "grad_norm": 0.5246696472167969, "learning_rate": 5.149149149149149e-05, "loss": 1.1652, "step": 3715 }, { "epoch": 0.5463701525454879, "grad_norm": 0.5831330418586731, "learning_rate": 5.145145145145145e-05, "loss": 1.4104, "step": 3716 }, { "epoch": 0.5465171843411137, "grad_norm": 0.6538448333740234, "learning_rate": 5.1411411411411414e-05, "loss": 1.2804, "step": 3717 }, { "epoch": 0.5466642161367395, "grad_norm": 0.589469850063324, "learning_rate": 5.137137137137137e-05, "loss": 1.1045, "step": 3718 }, { "epoch": 0.5468112479323654, "grad_norm": 0.43500852584838867, "learning_rate": 5.1331331331331336e-05, "loss": 1.4984, "step": 3719 }, { "epoch": 0.5469582797279912, "grad_norm": 0.5234942436218262, "learning_rate": 5.12912912912913e-05, "loss": 1.4792, "step": 3720 }, { "epoch": 0.547105311523617, "grad_norm": 0.6722154021263123, "learning_rate": 5.125125125125125e-05, "loss": 1.0941, "step": 3721 }, { "epoch": 0.5472523433192428, "grad_norm": 0.6606892347335815, "learning_rate": 5.121121121121122e-05, "loss": 1.1557, "step": 3722 }, { "epoch": 0.5473993751148686, "grad_norm": 0.5803148746490479, "learning_rate": 5.117117117117117e-05, "loss": 0.9343, "step": 3723 }, { "epoch": 0.5475464069104944, "grad_norm": 0.5074203610420227, "learning_rate": 5.1131131131131136e-05, "loss": 1.5816, "step": 3724 }, { "epoch": 0.5476934387061202, "grad_norm": 0.7253628969192505, "learning_rate": 5.1091091091091094e-05, "loss": 1.3639, "step": 3725 }, { "epoch": 0.547840470501746, "grad_norm": 0.5617417097091675, "learning_rate": 5.105105105105106e-05, "loss": 1.193, "step": 3726 }, { "epoch": 0.5479875022973718, "grad_norm": 0.46824169158935547, "learning_rate": 5.1011011011011015e-05, "loss": 1.4117, "step": 3727 }, { "epoch": 0.5481345340929976, "grad_norm": 0.5868620872497559, "learning_rate": 5.097097097097098e-05, "loss": 1.0127, "step": 3728 }, { "epoch": 0.5482815658886234, "grad_norm": 0.6855554580688477, "learning_rate": 5.093093093093093e-05, "loss": 1.0818, "step": 3729 }, { "epoch": 0.5484285976842492, "grad_norm": 0.8185373544692993, "learning_rate": 5.0890890890890894e-05, "loss": 1.0238, "step": 3730 }, { "epoch": 0.548575629479875, "grad_norm": 0.49996230006217957, "learning_rate": 5.085085085085085e-05, "loss": 1.2439, "step": 3731 }, { "epoch": 0.5487226612755008, "grad_norm": 0.467221736907959, "learning_rate": 5.0810810810810815e-05, "loss": 1.3249, "step": 3732 }, { "epoch": 0.5488696930711267, "grad_norm": 0.5610997676849365, "learning_rate": 5.077077077077077e-05, "loss": 1.3056, "step": 3733 }, { "epoch": 0.5490167248667525, "grad_norm": 0.49061813950538635, "learning_rate": 5.073073073073074e-05, "loss": 1.3334, "step": 3734 }, { "epoch": 0.5491637566623783, "grad_norm": 0.796135663986206, "learning_rate": 5.069069069069069e-05, "loss": 0.9713, "step": 3735 }, { "epoch": 0.5493107884580041, "grad_norm": 0.4571463167667389, "learning_rate": 5.065065065065066e-05, "loss": 1.3144, "step": 3736 }, { "epoch": 0.5494578202536299, "grad_norm": 0.6138120889663696, "learning_rate": 5.061061061061061e-05, "loss": 1.1166, "step": 3737 }, { "epoch": 0.5496048520492557, "grad_norm": 0.4791252315044403, "learning_rate": 5.057057057057057e-05, "loss": 1.3965, "step": 3738 }, { "epoch": 0.5497518838448815, "grad_norm": 0.5932607054710388, "learning_rate": 5.053053053053053e-05, "loss": 1.3505, "step": 3739 }, { "epoch": 0.5498989156405073, "grad_norm": 0.6665881276130676, "learning_rate": 5.0490490490490494e-05, "loss": 1.1372, "step": 3740 }, { "epoch": 0.5500459474361331, "grad_norm": 0.6763858199119568, "learning_rate": 5.0450450450450445e-05, "loss": 1.1272, "step": 3741 }, { "epoch": 0.5501929792317589, "grad_norm": 0.5301457643508911, "learning_rate": 5.0410410410410416e-05, "loss": 1.1393, "step": 3742 }, { "epoch": 0.5503400110273847, "grad_norm": 0.48861944675445557, "learning_rate": 5.0370370370370366e-05, "loss": 0.9624, "step": 3743 }, { "epoch": 0.5504870428230105, "grad_norm": 0.6424450874328613, "learning_rate": 5.033033033033033e-05, "loss": 1.2923, "step": 3744 }, { "epoch": 0.5506340746186362, "grad_norm": 0.7346069812774658, "learning_rate": 5.029029029029029e-05, "loss": 0.9971, "step": 3745 }, { "epoch": 0.550781106414262, "grad_norm": 0.6363334059715271, "learning_rate": 5.025025025025025e-05, "loss": 1.0148, "step": 3746 }, { "epoch": 0.5509281382098878, "grad_norm": 0.5275687575340271, "learning_rate": 5.0210210210210216e-05, "loss": 1.6772, "step": 3747 }, { "epoch": 0.5510751700055136, "grad_norm": 0.8487553596496582, "learning_rate": 5.0170170170170174e-05, "loss": 0.8251, "step": 3748 }, { "epoch": 0.5512222018011395, "grad_norm": 0.5274917483329773, "learning_rate": 5.013013013013014e-05, "loss": 1.2346, "step": 3749 }, { "epoch": 0.5513692335967653, "grad_norm": 0.6556288599967957, "learning_rate": 5.009009009009009e-05, "loss": 0.8419, "step": 3750 }, { "epoch": 0.5515162653923911, "grad_norm": 0.4074132442474365, "learning_rate": 5.005005005005006e-05, "loss": 1.3268, "step": 3751 }, { "epoch": 0.5516632971880169, "grad_norm": 0.5399186015129089, "learning_rate": 5.001001001001001e-05, "loss": 1.5787, "step": 3752 }, { "epoch": 0.5518103289836427, "grad_norm": 0.6955708861351013, "learning_rate": 4.9969969969969974e-05, "loss": 0.8627, "step": 3753 }, { "epoch": 0.5519573607792685, "grad_norm": 0.7642090916633606, "learning_rate": 4.992992992992993e-05, "loss": 1.0133, "step": 3754 }, { "epoch": 0.5521043925748943, "grad_norm": 0.6580532193183899, "learning_rate": 4.988988988988989e-05, "loss": 1.1251, "step": 3755 }, { "epoch": 0.5522514243705201, "grad_norm": 0.6795801520347595, "learning_rate": 4.984984984984985e-05, "loss": 0.8544, "step": 3756 }, { "epoch": 0.5523984561661459, "grad_norm": 0.6155956983566284, "learning_rate": 4.980980980980981e-05, "loss": 1.1958, "step": 3757 }, { "epoch": 0.5525454879617717, "grad_norm": 0.5719667077064514, "learning_rate": 4.976976976976977e-05, "loss": 1.4269, "step": 3758 }, { "epoch": 0.5526925197573975, "grad_norm": 0.6304296851158142, "learning_rate": 4.972972972972974e-05, "loss": 1.3494, "step": 3759 }, { "epoch": 0.5528395515530233, "grad_norm": 0.42851972579956055, "learning_rate": 4.9689689689689696e-05, "loss": 1.7649, "step": 3760 }, { "epoch": 0.5529865833486491, "grad_norm": 0.7069100737571716, "learning_rate": 4.964964964964965e-05, "loss": 1.1892, "step": 3761 }, { "epoch": 0.553133615144275, "grad_norm": 0.6985968351364136, "learning_rate": 4.960960960960962e-05, "loss": 0.9603, "step": 3762 }, { "epoch": 0.5532806469399008, "grad_norm": 0.44447994232177734, "learning_rate": 4.9569569569569574e-05, "loss": 1.2104, "step": 3763 }, { "epoch": 0.5534276787355266, "grad_norm": 0.5665764212608337, "learning_rate": 4.952952952952953e-05, "loss": 1.2559, "step": 3764 }, { "epoch": 0.5535747105311524, "grad_norm": 0.3997505009174347, "learning_rate": 4.9489489489489496e-05, "loss": 0.9036, "step": 3765 }, { "epoch": 0.5537217423267782, "grad_norm": 0.4906400740146637, "learning_rate": 4.944944944944945e-05, "loss": 1.7412, "step": 3766 }, { "epoch": 0.553868774122404, "grad_norm": 0.48649805784225464, "learning_rate": 4.940940940940941e-05, "loss": 1.4611, "step": 3767 }, { "epoch": 0.5540158059180298, "grad_norm": 0.4913535714149475, "learning_rate": 4.9369369369369375e-05, "loss": 1.1123, "step": 3768 }, { "epoch": 0.5541628377136556, "grad_norm": 0.6994661092758179, "learning_rate": 4.932932932932933e-05, "loss": 1.0998, "step": 3769 }, { "epoch": 0.5543098695092814, "grad_norm": 0.5046922564506531, "learning_rate": 4.928928928928929e-05, "loss": 1.1256, "step": 3770 }, { "epoch": 0.5544569013049072, "grad_norm": 0.6988959908485413, "learning_rate": 4.9249249249249253e-05, "loss": 1.0988, "step": 3771 }, { "epoch": 0.554603933100533, "grad_norm": 0.5125321745872498, "learning_rate": 4.920920920920921e-05, "loss": 1.2074, "step": 3772 }, { "epoch": 0.5547509648961588, "grad_norm": 0.6693763732910156, "learning_rate": 4.916916916916917e-05, "loss": 1.1539, "step": 3773 }, { "epoch": 0.5548979966917846, "grad_norm": 0.6703200936317444, "learning_rate": 4.912912912912913e-05, "loss": 1.0163, "step": 3774 }, { "epoch": 0.5550450284874104, "grad_norm": 0.6866796016693115, "learning_rate": 4.908908908908909e-05, "loss": 1.2047, "step": 3775 }, { "epoch": 0.5551920602830362, "grad_norm": 0.730365514755249, "learning_rate": 4.9049049049049054e-05, "loss": 1.1792, "step": 3776 }, { "epoch": 0.555339092078662, "grad_norm": 0.695371150970459, "learning_rate": 4.900900900900901e-05, "loss": 0.8183, "step": 3777 }, { "epoch": 0.5554861238742879, "grad_norm": 0.6250258088111877, "learning_rate": 4.896896896896897e-05, "loss": 1.3401, "step": 3778 }, { "epoch": 0.5556331556699137, "grad_norm": 0.5506641268730164, "learning_rate": 4.892892892892893e-05, "loss": 1.1052, "step": 3779 }, { "epoch": 0.5557801874655395, "grad_norm": 0.6735824346542358, "learning_rate": 4.888888888888889e-05, "loss": 1.1606, "step": 3780 }, { "epoch": 0.5559272192611652, "grad_norm": 0.7568615078926086, "learning_rate": 4.884884884884885e-05, "loss": 0.776, "step": 3781 }, { "epoch": 0.556074251056791, "grad_norm": 0.537862241268158, "learning_rate": 4.880880880880881e-05, "loss": 1.1431, "step": 3782 }, { "epoch": 0.5562212828524168, "grad_norm": 0.5762971639633179, "learning_rate": 4.876876876876877e-05, "loss": 1.1, "step": 3783 }, { "epoch": 0.5563683146480426, "grad_norm": 0.47579696774482727, "learning_rate": 4.8728728728728726e-05, "loss": 1.2878, "step": 3784 }, { "epoch": 0.5565153464436684, "grad_norm": 0.5667315125465393, "learning_rate": 4.868868868868869e-05, "loss": 0.8164, "step": 3785 }, { "epoch": 0.5566623782392942, "grad_norm": 0.6163331270217896, "learning_rate": 4.8648648648648654e-05, "loss": 1.0392, "step": 3786 }, { "epoch": 0.55680941003492, "grad_norm": 0.4479523301124573, "learning_rate": 4.860860860860861e-05, "loss": 1.3581, "step": 3787 }, { "epoch": 0.5569564418305458, "grad_norm": 0.4151988625526428, "learning_rate": 4.8568568568568576e-05, "loss": 1.2181, "step": 3788 }, { "epoch": 0.5571034736261716, "grad_norm": 0.7238032817840576, "learning_rate": 4.852852852852853e-05, "loss": 1.0502, "step": 3789 }, { "epoch": 0.5572505054217974, "grad_norm": 0.5873985886573792, "learning_rate": 4.848848848848849e-05, "loss": 0.8733, "step": 3790 }, { "epoch": 0.5573975372174232, "grad_norm": 0.7357854843139648, "learning_rate": 4.8448448448448455e-05, "loss": 0.9068, "step": 3791 }, { "epoch": 0.557544569013049, "grad_norm": 0.5277373790740967, "learning_rate": 4.840840840840841e-05, "loss": 1.2257, "step": 3792 }, { "epoch": 0.5576916008086749, "grad_norm": 0.43599599599838257, "learning_rate": 4.836836836836837e-05, "loss": 1.4243, "step": 3793 }, { "epoch": 0.5578386326043007, "grad_norm": 0.5330475568771362, "learning_rate": 4.832832832832833e-05, "loss": 1.1973, "step": 3794 }, { "epoch": 0.5579856643999265, "grad_norm": 0.5059161186218262, "learning_rate": 4.828828828828829e-05, "loss": 1.0269, "step": 3795 }, { "epoch": 0.5581326961955523, "grad_norm": 0.7007160186767578, "learning_rate": 4.824824824824825e-05, "loss": 1.2014, "step": 3796 }, { "epoch": 0.5582797279911781, "grad_norm": 0.423744261264801, "learning_rate": 4.820820820820821e-05, "loss": 1.1235, "step": 3797 }, { "epoch": 0.5584267597868039, "grad_norm": 0.6512206792831421, "learning_rate": 4.816816816816817e-05, "loss": 1.0392, "step": 3798 }, { "epoch": 0.5585737915824297, "grad_norm": 0.5817866921424866, "learning_rate": 4.8128128128128134e-05, "loss": 1.0651, "step": 3799 }, { "epoch": 0.5587208233780555, "grad_norm": 0.48559629917144775, "learning_rate": 4.808808808808809e-05, "loss": 1.0004, "step": 3800 }, { "epoch": 0.5588678551736813, "grad_norm": 0.4517654776573181, "learning_rate": 4.804804804804805e-05, "loss": 1.4038, "step": 3801 }, { "epoch": 0.5590148869693071, "grad_norm": 0.6598575711250305, "learning_rate": 4.800800800800801e-05, "loss": 1.0929, "step": 3802 }, { "epoch": 0.5591619187649329, "grad_norm": 0.673992395401001, "learning_rate": 4.796796796796797e-05, "loss": 1.012, "step": 3803 }, { "epoch": 0.5593089505605587, "grad_norm": 0.49867865443229675, "learning_rate": 4.792792792792793e-05, "loss": 1.2178, "step": 3804 }, { "epoch": 0.5594559823561845, "grad_norm": 0.5446364879608154, "learning_rate": 4.788788788788789e-05, "loss": 1.3521, "step": 3805 }, { "epoch": 0.5596030141518104, "grad_norm": 0.6378092169761658, "learning_rate": 4.784784784784785e-05, "loss": 0.677, "step": 3806 }, { "epoch": 0.5597500459474362, "grad_norm": 0.49180129170417786, "learning_rate": 4.7807807807807806e-05, "loss": 1.0642, "step": 3807 }, { "epoch": 0.559897077743062, "grad_norm": 0.49660155177116394, "learning_rate": 4.776776776776777e-05, "loss": 1.5257, "step": 3808 }, { "epoch": 0.5600441095386878, "grad_norm": 0.5603529214859009, "learning_rate": 4.772772772772773e-05, "loss": 1.3782, "step": 3809 }, { "epoch": 0.5601911413343136, "grad_norm": 0.7489359378814697, "learning_rate": 4.7687687687687685e-05, "loss": 0.9372, "step": 3810 }, { "epoch": 0.5603381731299394, "grad_norm": 0.6636098027229309, "learning_rate": 4.764764764764765e-05, "loss": 1.0375, "step": 3811 }, { "epoch": 0.5604852049255652, "grad_norm": 0.515703558921814, "learning_rate": 4.7607607607607606e-05, "loss": 1.1591, "step": 3812 }, { "epoch": 0.560632236721191, "grad_norm": 0.590718686580658, "learning_rate": 4.756756756756757e-05, "loss": 1.3893, "step": 3813 }, { "epoch": 0.5607792685168168, "grad_norm": 0.6929574608802795, "learning_rate": 4.7527527527527534e-05, "loss": 1.15, "step": 3814 }, { "epoch": 0.5609263003124426, "grad_norm": 0.6283215880393982, "learning_rate": 4.748748748748749e-05, "loss": 1.1669, "step": 3815 }, { "epoch": 0.5610733321080684, "grad_norm": 0.5141939520835876, "learning_rate": 4.744744744744745e-05, "loss": 1.5575, "step": 3816 }, { "epoch": 0.5612203639036941, "grad_norm": 0.5928391218185425, "learning_rate": 4.740740740740741e-05, "loss": 1.2706, "step": 3817 }, { "epoch": 0.5613673956993199, "grad_norm": 0.49608710408210754, "learning_rate": 4.736736736736737e-05, "loss": 1.1295, "step": 3818 }, { "epoch": 0.5615144274949457, "grad_norm": 0.5997463464736938, "learning_rate": 4.732732732732733e-05, "loss": 1.1384, "step": 3819 }, { "epoch": 0.5616614592905715, "grad_norm": 0.7574917078018188, "learning_rate": 4.728728728728729e-05, "loss": 1.1293, "step": 3820 }, { "epoch": 0.5618084910861973, "grad_norm": 0.8185042142868042, "learning_rate": 4.724724724724725e-05, "loss": 1.1377, "step": 3821 }, { "epoch": 0.5619555228818232, "grad_norm": 0.5406460762023926, "learning_rate": 4.7207207207207214e-05, "loss": 1.6938, "step": 3822 }, { "epoch": 0.562102554677449, "grad_norm": 0.4371257722377777, "learning_rate": 4.716716716716717e-05, "loss": 1.1615, "step": 3823 }, { "epoch": 0.5622495864730748, "grad_norm": 0.46965792775154114, "learning_rate": 4.712712712712713e-05, "loss": 1.2726, "step": 3824 }, { "epoch": 0.5623966182687006, "grad_norm": 0.8558904528617859, "learning_rate": 4.708708708708709e-05, "loss": 0.947, "step": 3825 }, { "epoch": 0.5625436500643264, "grad_norm": 0.5675462484359741, "learning_rate": 4.704704704704705e-05, "loss": 1.1069, "step": 3826 }, { "epoch": 0.5626906818599522, "grad_norm": 0.5614242553710938, "learning_rate": 4.700700700700701e-05, "loss": 0.8843, "step": 3827 }, { "epoch": 0.562837713655578, "grad_norm": 0.692995548248291, "learning_rate": 4.696696696696697e-05, "loss": 1.0519, "step": 3828 }, { "epoch": 0.5629847454512038, "grad_norm": 0.6384310722351074, "learning_rate": 4.692692692692693e-05, "loss": 1.2053, "step": 3829 }, { "epoch": 0.5631317772468296, "grad_norm": 0.47829052805900574, "learning_rate": 4.6886886886886886e-05, "loss": 1.0826, "step": 3830 }, { "epoch": 0.5632788090424554, "grad_norm": 0.5681575536727905, "learning_rate": 4.684684684684685e-05, "loss": 1.0808, "step": 3831 }, { "epoch": 0.5634258408380812, "grad_norm": 0.5397534370422363, "learning_rate": 4.680680680680681e-05, "loss": 1.411, "step": 3832 }, { "epoch": 0.563572872633707, "grad_norm": 0.5977259278297424, "learning_rate": 4.6766766766766765e-05, "loss": 0.9579, "step": 3833 }, { "epoch": 0.5637199044293328, "grad_norm": 0.4809923470020294, "learning_rate": 4.672672672672673e-05, "loss": 1.2143, "step": 3834 }, { "epoch": 0.5638669362249586, "grad_norm": 0.5989115834236145, "learning_rate": 4.6686686686686686e-05, "loss": 0.9787, "step": 3835 }, { "epoch": 0.5640139680205845, "grad_norm": 0.7424130439758301, "learning_rate": 4.6646646646646644e-05, "loss": 1.2069, "step": 3836 }, { "epoch": 0.5641609998162103, "grad_norm": 0.5582441091537476, "learning_rate": 4.660660660660661e-05, "loss": 1.0273, "step": 3837 }, { "epoch": 0.5643080316118361, "grad_norm": 0.4362504780292511, "learning_rate": 4.6566566566566565e-05, "loss": 1.598, "step": 3838 }, { "epoch": 0.5644550634074619, "grad_norm": 0.8223304748535156, "learning_rate": 4.652652652652653e-05, "loss": 1.0172, "step": 3839 }, { "epoch": 0.5646020952030877, "grad_norm": 0.5276161432266235, "learning_rate": 4.648648648648649e-05, "loss": 1.1145, "step": 3840 }, { "epoch": 0.5647491269987135, "grad_norm": 0.5889221429824829, "learning_rate": 4.644644644644645e-05, "loss": 1.1555, "step": 3841 }, { "epoch": 0.5648961587943393, "grad_norm": 0.5242165327072144, "learning_rate": 4.640640640640641e-05, "loss": 1.116, "step": 3842 }, { "epoch": 0.5650431905899651, "grad_norm": 0.5727576017379761, "learning_rate": 4.636636636636637e-05, "loss": 1.576, "step": 3843 }, { "epoch": 0.5651902223855909, "grad_norm": 0.34849581122398376, "learning_rate": 4.632632632632633e-05, "loss": 1.5351, "step": 3844 }, { "epoch": 0.5653372541812167, "grad_norm": 0.8393445014953613, "learning_rate": 4.628628628628629e-05, "loss": 1.0148, "step": 3845 }, { "epoch": 0.5654842859768425, "grad_norm": 0.5913763642311096, "learning_rate": 4.624624624624625e-05, "loss": 0.9048, "step": 3846 }, { "epoch": 0.5656313177724683, "grad_norm": 0.7463865876197815, "learning_rate": 4.620620620620621e-05, "loss": 0.9406, "step": 3847 }, { "epoch": 0.5657783495680941, "grad_norm": 0.7274550795555115, "learning_rate": 4.616616616616617e-05, "loss": 1.2738, "step": 3848 }, { "epoch": 0.56592538136372, "grad_norm": 0.7757299542427063, "learning_rate": 4.612612612612613e-05, "loss": 1.0664, "step": 3849 }, { "epoch": 0.5660724131593458, "grad_norm": 0.6726906895637512, "learning_rate": 4.608608608608609e-05, "loss": 1.196, "step": 3850 }, { "epoch": 0.5662194449549716, "grad_norm": 0.48007312417030334, "learning_rate": 4.604604604604605e-05, "loss": 1.3003, "step": 3851 }, { "epoch": 0.5663664767505974, "grad_norm": 0.5004801750183105, "learning_rate": 4.600600600600601e-05, "loss": 1.6437, "step": 3852 }, { "epoch": 0.5665135085462231, "grad_norm": 0.5643466114997864, "learning_rate": 4.5965965965965966e-05, "loss": 1.4893, "step": 3853 }, { "epoch": 0.5666605403418489, "grad_norm": 0.6125984787940979, "learning_rate": 4.592592592592593e-05, "loss": 1.0793, "step": 3854 }, { "epoch": 0.5668075721374747, "grad_norm": 0.6897003650665283, "learning_rate": 4.588588588588589e-05, "loss": 0.821, "step": 3855 }, { "epoch": 0.5669546039331005, "grad_norm": 0.6814178824424744, "learning_rate": 4.5845845845845845e-05, "loss": 1.0918, "step": 3856 }, { "epoch": 0.5671016357287263, "grad_norm": 0.6035454273223877, "learning_rate": 4.580580580580581e-05, "loss": 1.2033, "step": 3857 }, { "epoch": 0.5672486675243521, "grad_norm": 0.8500722050666809, "learning_rate": 4.5765765765765766e-05, "loss": 0.9182, "step": 3858 }, { "epoch": 0.5673956993199779, "grad_norm": 0.5306290984153748, "learning_rate": 4.5725725725725723e-05, "loss": 1.8402, "step": 3859 }, { "epoch": 0.5675427311156037, "grad_norm": 0.487582802772522, "learning_rate": 4.568568568568569e-05, "loss": 1.1744, "step": 3860 }, { "epoch": 0.5676897629112295, "grad_norm": 0.6044211387634277, "learning_rate": 4.5645645645645645e-05, "loss": 0.9733, "step": 3861 }, { "epoch": 0.5678367947068553, "grad_norm": 0.6591529250144958, "learning_rate": 4.560560560560561e-05, "loss": 0.5781, "step": 3862 }, { "epoch": 0.5679838265024811, "grad_norm": 0.6897171139717102, "learning_rate": 4.5565565565565566e-05, "loss": 1.2583, "step": 3863 }, { "epoch": 0.5681308582981069, "grad_norm": 0.4959266781806946, "learning_rate": 4.5525525525525524e-05, "loss": 0.9335, "step": 3864 }, { "epoch": 0.5682778900937328, "grad_norm": 0.5995336174964905, "learning_rate": 4.548548548548549e-05, "loss": 1.2789, "step": 3865 }, { "epoch": 0.5684249218893586, "grad_norm": 0.5895504951477051, "learning_rate": 4.544544544544545e-05, "loss": 0.8974, "step": 3866 }, { "epoch": 0.5685719536849844, "grad_norm": 0.43526363372802734, "learning_rate": 4.540540540540541e-05, "loss": 1.5736, "step": 3867 }, { "epoch": 0.5687189854806102, "grad_norm": 0.6289941668510437, "learning_rate": 4.536536536536537e-05, "loss": 1.2204, "step": 3868 }, { "epoch": 0.568866017276236, "grad_norm": 0.560651421546936, "learning_rate": 4.532532532532533e-05, "loss": 0.9694, "step": 3869 }, { "epoch": 0.5690130490718618, "grad_norm": 0.6546974182128906, "learning_rate": 4.528528528528529e-05, "loss": 1.0803, "step": 3870 }, { "epoch": 0.5691600808674876, "grad_norm": 0.5679540038108826, "learning_rate": 4.524524524524525e-05, "loss": 1.5675, "step": 3871 }, { "epoch": 0.5693071126631134, "grad_norm": 0.812063455581665, "learning_rate": 4.520520520520521e-05, "loss": 1.173, "step": 3872 }, { "epoch": 0.5694541444587392, "grad_norm": 0.5996072292327881, "learning_rate": 4.516516516516517e-05, "loss": 1.1716, "step": 3873 }, { "epoch": 0.569601176254365, "grad_norm": 0.6152629852294922, "learning_rate": 4.512512512512513e-05, "loss": 1.2264, "step": 3874 }, { "epoch": 0.5697482080499908, "grad_norm": 0.8174483776092529, "learning_rate": 4.508508508508509e-05, "loss": 1.3019, "step": 3875 }, { "epoch": 0.5698952398456166, "grad_norm": 0.49028369784355164, "learning_rate": 4.5045045045045046e-05, "loss": 1.3535, "step": 3876 }, { "epoch": 0.5700422716412424, "grad_norm": 0.46497493982315063, "learning_rate": 4.500500500500501e-05, "loss": 1.0748, "step": 3877 }, { "epoch": 0.5701893034368682, "grad_norm": 0.7332792282104492, "learning_rate": 4.496496496496497e-05, "loss": 1.3276, "step": 3878 }, { "epoch": 0.570336335232494, "grad_norm": 0.5562291741371155, "learning_rate": 4.4924924924924925e-05, "loss": 1.1905, "step": 3879 }, { "epoch": 0.5704833670281199, "grad_norm": 0.5178377032279968, "learning_rate": 4.488488488488489e-05, "loss": 1.7338, "step": 3880 }, { "epoch": 0.5706303988237457, "grad_norm": 0.578823983669281, "learning_rate": 4.4844844844844846e-05, "loss": 1.1516, "step": 3881 }, { "epoch": 0.5707774306193715, "grad_norm": 0.46680113673210144, "learning_rate": 4.48048048048048e-05, "loss": 1.7533, "step": 3882 }, { "epoch": 0.5709244624149973, "grad_norm": 0.5215994119644165, "learning_rate": 4.476476476476477e-05, "loss": 1.1087, "step": 3883 }, { "epoch": 0.5710714942106231, "grad_norm": 0.5072968006134033, "learning_rate": 4.4724724724724725e-05, "loss": 1.255, "step": 3884 }, { "epoch": 0.5712185260062489, "grad_norm": 0.808149516582489, "learning_rate": 4.468468468468469e-05, "loss": 0.989, "step": 3885 }, { "epoch": 0.5713655578018747, "grad_norm": 0.6969342231750488, "learning_rate": 4.4644644644644646e-05, "loss": 1.2741, "step": 3886 }, { "epoch": 0.5715125895975005, "grad_norm": 0.5127707123756409, "learning_rate": 4.4604604604604604e-05, "loss": 1.6265, "step": 3887 }, { "epoch": 0.5716596213931263, "grad_norm": 0.5775532126426697, "learning_rate": 4.456456456456457e-05, "loss": 1.0986, "step": 3888 }, { "epoch": 0.5718066531887521, "grad_norm": 0.7441417574882507, "learning_rate": 4.4524524524524525e-05, "loss": 1.3964, "step": 3889 }, { "epoch": 0.5719536849843778, "grad_norm": 0.6807921528816223, "learning_rate": 4.448448448448448e-05, "loss": 1.0375, "step": 3890 }, { "epoch": 0.5721007167800036, "grad_norm": 0.6171905398368835, "learning_rate": 4.4444444444444447e-05, "loss": 1.0825, "step": 3891 }, { "epoch": 0.5722477485756294, "grad_norm": 0.6555778384208679, "learning_rate": 4.4404404404404404e-05, "loss": 1.12, "step": 3892 }, { "epoch": 0.5723947803712552, "grad_norm": 0.6038200259208679, "learning_rate": 4.436436436436437e-05, "loss": 1.1263, "step": 3893 }, { "epoch": 0.572541812166881, "grad_norm": 0.5801321864128113, "learning_rate": 4.432432432432433e-05, "loss": 0.9774, "step": 3894 }, { "epoch": 0.5726888439625069, "grad_norm": 0.6304653286933899, "learning_rate": 4.428428428428429e-05, "loss": 0.9723, "step": 3895 }, { "epoch": 0.5728358757581327, "grad_norm": 0.5931771993637085, "learning_rate": 4.424424424424425e-05, "loss": 1.1068, "step": 3896 }, { "epoch": 0.5729829075537585, "grad_norm": 0.6631065011024475, "learning_rate": 4.420420420420421e-05, "loss": 1.4735, "step": 3897 }, { "epoch": 0.5731299393493843, "grad_norm": 0.4899848997592926, "learning_rate": 4.416416416416417e-05, "loss": 1.2295, "step": 3898 }, { "epoch": 0.5732769711450101, "grad_norm": 0.6712191700935364, "learning_rate": 4.4124124124124126e-05, "loss": 1.1945, "step": 3899 }, { "epoch": 0.5734240029406359, "grad_norm": 0.5065829157829285, "learning_rate": 4.408408408408409e-05, "loss": 1.4563, "step": 3900 }, { "epoch": 0.5735710347362617, "grad_norm": 0.8651532530784607, "learning_rate": 4.404404404404405e-05, "loss": 1.5171, "step": 3901 }, { "epoch": 0.5737180665318875, "grad_norm": 0.686324954032898, "learning_rate": 4.4004004004004004e-05, "loss": 1.1432, "step": 3902 }, { "epoch": 0.5738650983275133, "grad_norm": 0.5401709675788879, "learning_rate": 4.396396396396397e-05, "loss": 1.4161, "step": 3903 }, { "epoch": 0.5740121301231391, "grad_norm": 0.4878500699996948, "learning_rate": 4.3923923923923926e-05, "loss": 0.882, "step": 3904 }, { "epoch": 0.5741591619187649, "grad_norm": 0.6764366030693054, "learning_rate": 4.388388388388388e-05, "loss": 0.9984, "step": 3905 }, { "epoch": 0.5743061937143907, "grad_norm": 0.6165736317634583, "learning_rate": 4.384384384384385e-05, "loss": 1.0293, "step": 3906 }, { "epoch": 0.5744532255100165, "grad_norm": 0.47062137722969055, "learning_rate": 4.3803803803803805e-05, "loss": 1.1129, "step": 3907 }, { "epoch": 0.5746002573056423, "grad_norm": 0.5305542349815369, "learning_rate": 4.376376376376376e-05, "loss": 1.2581, "step": 3908 }, { "epoch": 0.5747472891012682, "grad_norm": 0.4706820845603943, "learning_rate": 4.3723723723723726e-05, "loss": 0.9376, "step": 3909 }, { "epoch": 0.574894320896894, "grad_norm": 0.5048362612724304, "learning_rate": 4.3683683683683684e-05, "loss": 1.7952, "step": 3910 }, { "epoch": 0.5750413526925198, "grad_norm": 0.8111968040466309, "learning_rate": 4.364364364364365e-05, "loss": 1.0982, "step": 3911 }, { "epoch": 0.5751883844881456, "grad_norm": 0.5307667255401611, "learning_rate": 4.3603603603603605e-05, "loss": 1.3133, "step": 3912 }, { "epoch": 0.5753354162837714, "grad_norm": 0.6125577092170715, "learning_rate": 4.356356356356356e-05, "loss": 1.0711, "step": 3913 }, { "epoch": 0.5754824480793972, "grad_norm": 0.845551073551178, "learning_rate": 4.3523523523523527e-05, "loss": 1.0273, "step": 3914 }, { "epoch": 0.575629479875023, "grad_norm": 0.4611373841762543, "learning_rate": 4.3483483483483484e-05, "loss": 0.8662, "step": 3915 }, { "epoch": 0.5757765116706488, "grad_norm": 0.44748732447624207, "learning_rate": 4.344344344344344e-05, "loss": 1.0162, "step": 3916 }, { "epoch": 0.5759235434662746, "grad_norm": 0.5885804891586304, "learning_rate": 4.3403403403403405e-05, "loss": 1.7767, "step": 3917 }, { "epoch": 0.5760705752619004, "grad_norm": 0.4944639801979065, "learning_rate": 4.336336336336336e-05, "loss": 1.0081, "step": 3918 }, { "epoch": 0.5762176070575262, "grad_norm": 0.5588499903678894, "learning_rate": 4.332332332332332e-05, "loss": 1.2734, "step": 3919 }, { "epoch": 0.576364638853152, "grad_norm": 0.4633791148662567, "learning_rate": 4.328328328328329e-05, "loss": 1.2736, "step": 3920 }, { "epoch": 0.5765116706487778, "grad_norm": 0.5876462459564209, "learning_rate": 4.324324324324325e-05, "loss": 1.2143, "step": 3921 }, { "epoch": 0.5766587024444036, "grad_norm": 0.6878694295883179, "learning_rate": 4.3203203203203206e-05, "loss": 1.1142, "step": 3922 }, { "epoch": 0.5768057342400295, "grad_norm": 0.630418062210083, "learning_rate": 4.316316316316317e-05, "loss": 1.2544, "step": 3923 }, { "epoch": 0.5769527660356553, "grad_norm": 0.707551896572113, "learning_rate": 4.312312312312313e-05, "loss": 1.4866, "step": 3924 }, { "epoch": 0.5770997978312811, "grad_norm": 0.6874594688415527, "learning_rate": 4.3083083083083084e-05, "loss": 1.018, "step": 3925 }, { "epoch": 0.5772468296269068, "grad_norm": 0.5306150317192078, "learning_rate": 4.304304304304305e-05, "loss": 1.3161, "step": 3926 }, { "epoch": 0.5773938614225326, "grad_norm": 0.5536537170410156, "learning_rate": 4.3003003003003006e-05, "loss": 1.1916, "step": 3927 }, { "epoch": 0.5775408932181584, "grad_norm": 0.6379367709159851, "learning_rate": 4.296296296296296e-05, "loss": 1.1714, "step": 3928 }, { "epoch": 0.5776879250137842, "grad_norm": 0.6079977750778198, "learning_rate": 4.292292292292293e-05, "loss": 1.286, "step": 3929 }, { "epoch": 0.57783495680941, "grad_norm": 0.6454192399978638, "learning_rate": 4.2882882882882885e-05, "loss": 1.1959, "step": 3930 }, { "epoch": 0.5779819886050358, "grad_norm": 0.688525915145874, "learning_rate": 4.284284284284284e-05, "loss": 0.9797, "step": 3931 }, { "epoch": 0.5781290204006616, "grad_norm": 0.6747182607650757, "learning_rate": 4.2802802802802806e-05, "loss": 1.4006, "step": 3932 }, { "epoch": 0.5782760521962874, "grad_norm": 0.5325867533683777, "learning_rate": 4.2762762762762763e-05, "loss": 1.1092, "step": 3933 }, { "epoch": 0.5784230839919132, "grad_norm": 0.48029419779777527, "learning_rate": 4.272272272272273e-05, "loss": 1.5321, "step": 3934 }, { "epoch": 0.578570115787539, "grad_norm": 0.49250197410583496, "learning_rate": 4.2682682682682685e-05, "loss": 1.051, "step": 3935 }, { "epoch": 0.5787171475831648, "grad_norm": 0.6519591212272644, "learning_rate": 4.264264264264264e-05, "loss": 0.8484, "step": 3936 }, { "epoch": 0.5788641793787906, "grad_norm": 0.6057565212249756, "learning_rate": 4.2602602602602606e-05, "loss": 1.3957, "step": 3937 }, { "epoch": 0.5790112111744165, "grad_norm": 0.6000537276268005, "learning_rate": 4.2562562562562564e-05, "loss": 1.3344, "step": 3938 }, { "epoch": 0.5791582429700423, "grad_norm": 0.4712616205215454, "learning_rate": 4.252252252252252e-05, "loss": 1.2045, "step": 3939 }, { "epoch": 0.5793052747656681, "grad_norm": 0.5122885704040527, "learning_rate": 4.2482482482482485e-05, "loss": 1.5662, "step": 3940 }, { "epoch": 0.5794523065612939, "grad_norm": 0.5452935099601746, "learning_rate": 4.244244244244244e-05, "loss": 0.9994, "step": 3941 }, { "epoch": 0.5795993383569197, "grad_norm": 0.6834075450897217, "learning_rate": 4.24024024024024e-05, "loss": 1.3551, "step": 3942 }, { "epoch": 0.5797463701525455, "grad_norm": 0.7645115852355957, "learning_rate": 4.2362362362362364e-05, "loss": 1.138, "step": 3943 }, { "epoch": 0.5798934019481713, "grad_norm": 0.39694881439208984, "learning_rate": 4.232232232232232e-05, "loss": 1.8378, "step": 3944 }, { "epoch": 0.5800404337437971, "grad_norm": 0.6558786630630493, "learning_rate": 4.228228228228228e-05, "loss": 1.2467, "step": 3945 }, { "epoch": 0.5801874655394229, "grad_norm": 0.4884409010410309, "learning_rate": 4.224224224224225e-05, "loss": 1.384, "step": 3946 }, { "epoch": 0.5803344973350487, "grad_norm": 0.3794085383415222, "learning_rate": 4.220220220220221e-05, "loss": 1.2513, "step": 3947 }, { "epoch": 0.5804815291306745, "grad_norm": 0.5824689865112305, "learning_rate": 4.2162162162162164e-05, "loss": 0.9278, "step": 3948 }, { "epoch": 0.5806285609263003, "grad_norm": 0.5629547834396362, "learning_rate": 4.212212212212213e-05, "loss": 1.2882, "step": 3949 }, { "epoch": 0.5807755927219261, "grad_norm": 0.6141697764396667, "learning_rate": 4.2082082082082086e-05, "loss": 1.4175, "step": 3950 }, { "epoch": 0.5809226245175519, "grad_norm": 0.8441793918609619, "learning_rate": 4.204204204204204e-05, "loss": 1.0311, "step": 3951 }, { "epoch": 0.5810696563131778, "grad_norm": 0.5808511972427368, "learning_rate": 4.200200200200201e-05, "loss": 0.8337, "step": 3952 }, { "epoch": 0.5812166881088036, "grad_norm": 0.6125937700271606, "learning_rate": 4.1961961961961965e-05, "loss": 1.1852, "step": 3953 }, { "epoch": 0.5813637199044294, "grad_norm": 0.6295627951622009, "learning_rate": 4.192192192192192e-05, "loss": 0.7297, "step": 3954 }, { "epoch": 0.5815107517000552, "grad_norm": 0.8200278878211975, "learning_rate": 4.1881881881881886e-05, "loss": 1.3338, "step": 3955 }, { "epoch": 0.581657783495681, "grad_norm": 0.6150320768356323, "learning_rate": 4.1841841841841843e-05, "loss": 0.9272, "step": 3956 }, { "epoch": 0.5818048152913068, "grad_norm": 0.47576406598091125, "learning_rate": 4.180180180180181e-05, "loss": 1.2182, "step": 3957 }, { "epoch": 0.5819518470869326, "grad_norm": 0.504074215888977, "learning_rate": 4.1761761761761765e-05, "loss": 1.334, "step": 3958 }, { "epoch": 0.5820988788825584, "grad_norm": 0.6547872424125671, "learning_rate": 4.172172172172172e-05, "loss": 0.9855, "step": 3959 }, { "epoch": 0.5822459106781842, "grad_norm": 0.5159763097763062, "learning_rate": 4.1681681681681686e-05, "loss": 1.1131, "step": 3960 }, { "epoch": 0.58239294247381, "grad_norm": 0.654640793800354, "learning_rate": 4.1641641641641644e-05, "loss": 1.166, "step": 3961 }, { "epoch": 0.5825399742694357, "grad_norm": 0.5653752684593201, "learning_rate": 4.16016016016016e-05, "loss": 1.0804, "step": 3962 }, { "epoch": 0.5826870060650615, "grad_norm": 0.5739814043045044, "learning_rate": 4.1561561561561565e-05, "loss": 1.3318, "step": 3963 }, { "epoch": 0.5828340378606873, "grad_norm": 0.5872815847396851, "learning_rate": 4.152152152152152e-05, "loss": 0.8635, "step": 3964 }, { "epoch": 0.5829810696563131, "grad_norm": 0.4173577129840851, "learning_rate": 4.148148148148148e-05, "loss": 1.515, "step": 3965 }, { "epoch": 0.5831281014519389, "grad_norm": 0.6691954135894775, "learning_rate": 4.1441441441441444e-05, "loss": 0.9289, "step": 3966 }, { "epoch": 0.5832751332475647, "grad_norm": 0.6520882844924927, "learning_rate": 4.14014014014014e-05, "loss": 1.1152, "step": 3967 }, { "epoch": 0.5834221650431906, "grad_norm": 0.5380585789680481, "learning_rate": 4.136136136136136e-05, "loss": 1.0436, "step": 3968 }, { "epoch": 0.5835691968388164, "grad_norm": 0.4726931154727936, "learning_rate": 4.132132132132132e-05, "loss": 0.8907, "step": 3969 }, { "epoch": 0.5837162286344422, "grad_norm": 0.5997667908668518, "learning_rate": 4.128128128128128e-05, "loss": 1.0318, "step": 3970 }, { "epoch": 0.583863260430068, "grad_norm": 0.7806783318519592, "learning_rate": 4.124124124124124e-05, "loss": 0.9224, "step": 3971 }, { "epoch": 0.5840102922256938, "grad_norm": 0.6415377259254456, "learning_rate": 4.12012012012012e-05, "loss": 1.6375, "step": 3972 }, { "epoch": 0.5841573240213196, "grad_norm": 0.49921607971191406, "learning_rate": 4.1161161161161166e-05, "loss": 1.1261, "step": 3973 }, { "epoch": 0.5843043558169454, "grad_norm": 0.7759469747543335, "learning_rate": 4.112112112112112e-05, "loss": 0.9358, "step": 3974 }, { "epoch": 0.5844513876125712, "grad_norm": 0.5076806545257568, "learning_rate": 4.108108108108109e-05, "loss": 0.9555, "step": 3975 }, { "epoch": 0.584598419408197, "grad_norm": 0.5856360197067261, "learning_rate": 4.1041041041041045e-05, "loss": 1.6047, "step": 3976 }, { "epoch": 0.5847454512038228, "grad_norm": 0.5572112798690796, "learning_rate": 4.1001001001001e-05, "loss": 1.1799, "step": 3977 }, { "epoch": 0.5848924829994486, "grad_norm": 0.4203810393810272, "learning_rate": 4.0960960960960966e-05, "loss": 1.2833, "step": 3978 }, { "epoch": 0.5850395147950744, "grad_norm": 0.5821952819824219, "learning_rate": 4.092092092092092e-05, "loss": 1.0626, "step": 3979 }, { "epoch": 0.5851865465907002, "grad_norm": 0.5185145735740662, "learning_rate": 4.088088088088089e-05, "loss": 1.2602, "step": 3980 }, { "epoch": 0.585333578386326, "grad_norm": 0.48793739080429077, "learning_rate": 4.0840840840840845e-05, "loss": 1.6845, "step": 3981 }, { "epoch": 0.5854806101819519, "grad_norm": 0.4924751818180084, "learning_rate": 4.08008008008008e-05, "loss": 1.7381, "step": 3982 }, { "epoch": 0.5856276419775777, "grad_norm": 0.5601341724395752, "learning_rate": 4.0760760760760766e-05, "loss": 1.3726, "step": 3983 }, { "epoch": 0.5857746737732035, "grad_norm": 0.5795567631721497, "learning_rate": 4.0720720720720724e-05, "loss": 1.1993, "step": 3984 }, { "epoch": 0.5859217055688293, "grad_norm": 0.5444211959838867, "learning_rate": 4.068068068068068e-05, "loss": 1.0559, "step": 3985 }, { "epoch": 0.5860687373644551, "grad_norm": 0.49179282784461975, "learning_rate": 4.0640640640640645e-05, "loss": 1.0922, "step": 3986 }, { "epoch": 0.5862157691600809, "grad_norm": 0.8663086295127869, "learning_rate": 4.06006006006006e-05, "loss": 0.8542, "step": 3987 }, { "epoch": 0.5863628009557067, "grad_norm": 0.4640100598335266, "learning_rate": 4.056056056056056e-05, "loss": 1.0668, "step": 3988 }, { "epoch": 0.5865098327513325, "grad_norm": 0.5435771942138672, "learning_rate": 4.0520520520520524e-05, "loss": 1.2801, "step": 3989 }, { "epoch": 0.5866568645469583, "grad_norm": 0.6174678206443787, "learning_rate": 4.048048048048048e-05, "loss": 1.0414, "step": 3990 }, { "epoch": 0.5868038963425841, "grad_norm": 0.7196846008300781, "learning_rate": 4.044044044044044e-05, "loss": 0.9815, "step": 3991 }, { "epoch": 0.5869509281382099, "grad_norm": 0.5916906595230103, "learning_rate": 4.04004004004004e-05, "loss": 1.1245, "step": 3992 }, { "epoch": 0.5870979599338357, "grad_norm": 0.7911782264709473, "learning_rate": 4.036036036036036e-05, "loss": 1.5507, "step": 3993 }, { "epoch": 0.5872449917294615, "grad_norm": 0.6522746682167053, "learning_rate": 4.032032032032032e-05, "loss": 1.2236, "step": 3994 }, { "epoch": 0.5873920235250873, "grad_norm": 0.478255957365036, "learning_rate": 4.028028028028028e-05, "loss": 1.9246, "step": 3995 }, { "epoch": 0.5875390553207132, "grad_norm": 0.7348905205726624, "learning_rate": 4.024024024024024e-05, "loss": 0.9499, "step": 3996 }, { "epoch": 0.587686087116339, "grad_norm": 0.613096296787262, "learning_rate": 4.02002002002002e-05, "loss": 1.3049, "step": 3997 }, { "epoch": 0.5878331189119647, "grad_norm": 0.6269247531890869, "learning_rate": 4.016016016016016e-05, "loss": 1.0006, "step": 3998 }, { "epoch": 0.5879801507075905, "grad_norm": 0.6456705927848816, "learning_rate": 4.012012012012012e-05, "loss": 1.5161, "step": 3999 }, { "epoch": 0.5881271825032163, "grad_norm": 0.6287373304367065, "learning_rate": 4.008008008008008e-05, "loss": 1.133, "step": 4000 }, { "epoch": 0.5882742142988421, "grad_norm": 0.5247694253921509, "learning_rate": 4.0040040040040046e-05, "loss": 1.2938, "step": 4001 }, { "epoch": 0.5884212460944679, "grad_norm": 0.5692188739776611, "learning_rate": 4e-05, "loss": 1.0454, "step": 4002 }, { "epoch": 0.5885682778900937, "grad_norm": 0.5211926102638245, "learning_rate": 3.995995995995996e-05, "loss": 0.919, "step": 4003 }, { "epoch": 0.5887153096857195, "grad_norm": 0.51409512758255, "learning_rate": 3.9919919919919925e-05, "loss": 1.1403, "step": 4004 }, { "epoch": 0.5888623414813453, "grad_norm": 0.3548368215560913, "learning_rate": 3.987987987987988e-05, "loss": 1.526, "step": 4005 }, { "epoch": 0.5890093732769711, "grad_norm": 0.6359329223632812, "learning_rate": 3.9839839839839846e-05, "loss": 1.0368, "step": 4006 }, { "epoch": 0.5891564050725969, "grad_norm": 0.48642832040786743, "learning_rate": 3.9799799799799804e-05, "loss": 1.371, "step": 4007 }, { "epoch": 0.5893034368682227, "grad_norm": 0.6868888139724731, "learning_rate": 3.975975975975976e-05, "loss": 1.0302, "step": 4008 }, { "epoch": 0.5894504686638485, "grad_norm": 0.664439857006073, "learning_rate": 3.9719719719719725e-05, "loss": 1.176, "step": 4009 }, { "epoch": 0.5895975004594743, "grad_norm": 0.5817912817001343, "learning_rate": 3.967967967967968e-05, "loss": 1.1273, "step": 4010 }, { "epoch": 0.5897445322551002, "grad_norm": 0.5843328237533569, "learning_rate": 3.963963963963964e-05, "loss": 1.2136, "step": 4011 }, { "epoch": 0.589891564050726, "grad_norm": 0.5242504477500916, "learning_rate": 3.9599599599599604e-05, "loss": 1.4356, "step": 4012 }, { "epoch": 0.5900385958463518, "grad_norm": 0.514683723449707, "learning_rate": 3.955955955955956e-05, "loss": 1.7334, "step": 4013 }, { "epoch": 0.5901856276419776, "grad_norm": 0.5664594769477844, "learning_rate": 3.951951951951952e-05, "loss": 1.1484, "step": 4014 }, { "epoch": 0.5903326594376034, "grad_norm": 0.6937956809997559, "learning_rate": 3.947947947947948e-05, "loss": 1.1479, "step": 4015 }, { "epoch": 0.5904796912332292, "grad_norm": 0.6080839037895203, "learning_rate": 3.943943943943944e-05, "loss": 1.2295, "step": 4016 }, { "epoch": 0.590626723028855, "grad_norm": 0.46640217304229736, "learning_rate": 3.93993993993994e-05, "loss": 0.9021, "step": 4017 }, { "epoch": 0.5907737548244808, "grad_norm": 0.6322725415229797, "learning_rate": 3.935935935935936e-05, "loss": 1.1639, "step": 4018 }, { "epoch": 0.5909207866201066, "grad_norm": 0.7618115544319153, "learning_rate": 3.931931931931932e-05, "loss": 1.1317, "step": 4019 }, { "epoch": 0.5910678184157324, "grad_norm": 0.44221118092536926, "learning_rate": 3.927927927927928e-05, "loss": 1.0672, "step": 4020 }, { "epoch": 0.5912148502113582, "grad_norm": 0.6903846263885498, "learning_rate": 3.923923923923924e-05, "loss": 1.1592, "step": 4021 }, { "epoch": 0.591361882006984, "grad_norm": 0.6488900780677795, "learning_rate": 3.91991991991992e-05, "loss": 0.8991, "step": 4022 }, { "epoch": 0.5915089138026098, "grad_norm": 0.6327074766159058, "learning_rate": 3.915915915915916e-05, "loss": 1.1307, "step": 4023 }, { "epoch": 0.5916559455982356, "grad_norm": 0.709010660648346, "learning_rate": 3.911911911911912e-05, "loss": 1.0593, "step": 4024 }, { "epoch": 0.5918029773938615, "grad_norm": 0.515105128288269, "learning_rate": 3.9079079079079076e-05, "loss": 1.5156, "step": 4025 }, { "epoch": 0.5919500091894873, "grad_norm": 0.505111038684845, "learning_rate": 3.903903903903904e-05, "loss": 1.1686, "step": 4026 }, { "epoch": 0.5920970409851131, "grad_norm": 0.5735710859298706, "learning_rate": 3.8998998998999005e-05, "loss": 1.0011, "step": 4027 }, { "epoch": 0.5922440727807389, "grad_norm": 0.49664515256881714, "learning_rate": 3.895895895895896e-05, "loss": 1.6333, "step": 4028 }, { "epoch": 0.5923911045763647, "grad_norm": 0.5376994609832764, "learning_rate": 3.8918918918918926e-05, "loss": 1.22, "step": 4029 }, { "epoch": 0.5925381363719905, "grad_norm": 0.6213135123252869, "learning_rate": 3.8878878878878883e-05, "loss": 1.0126, "step": 4030 }, { "epoch": 0.5926851681676163, "grad_norm": 0.6643821001052856, "learning_rate": 3.883883883883884e-05, "loss": 1.0855, "step": 4031 }, { "epoch": 0.5928321999632421, "grad_norm": 0.5266476273536682, "learning_rate": 3.8798798798798805e-05, "loss": 0.9659, "step": 4032 }, { "epoch": 0.5929792317588679, "grad_norm": 0.45445355772972107, "learning_rate": 3.875875875875876e-05, "loss": 1.3784, "step": 4033 }, { "epoch": 0.5931262635544936, "grad_norm": 0.7232778072357178, "learning_rate": 3.871871871871872e-05, "loss": 1.1369, "step": 4034 }, { "epoch": 0.5932732953501194, "grad_norm": 0.5679019093513489, "learning_rate": 3.8678678678678684e-05, "loss": 1.0697, "step": 4035 }, { "epoch": 0.5934203271457452, "grad_norm": 0.6531158685684204, "learning_rate": 3.863863863863864e-05, "loss": 1.0034, "step": 4036 }, { "epoch": 0.593567358941371, "grad_norm": 0.6739774942398071, "learning_rate": 3.85985985985986e-05, "loss": 1.2221, "step": 4037 }, { "epoch": 0.5937143907369968, "grad_norm": 0.6018474698066711, "learning_rate": 3.855855855855856e-05, "loss": 0.9834, "step": 4038 }, { "epoch": 0.5938614225326226, "grad_norm": 0.7331787943840027, "learning_rate": 3.851851851851852e-05, "loss": 1.0343, "step": 4039 }, { "epoch": 0.5940084543282484, "grad_norm": 0.6017476320266724, "learning_rate": 3.847847847847848e-05, "loss": 1.0765, "step": 4040 }, { "epoch": 0.5941554861238743, "grad_norm": 0.7441921234130859, "learning_rate": 3.843843843843844e-05, "loss": 1.1737, "step": 4041 }, { "epoch": 0.5943025179195001, "grad_norm": 0.48707401752471924, "learning_rate": 3.83983983983984e-05, "loss": 1.2085, "step": 4042 }, { "epoch": 0.5944495497151259, "grad_norm": 0.4820218086242676, "learning_rate": 3.8358358358358356e-05, "loss": 1.3798, "step": 4043 }, { "epoch": 0.5945965815107517, "grad_norm": 0.5025620460510254, "learning_rate": 3.831831831831832e-05, "loss": 1.6714, "step": 4044 }, { "epoch": 0.5947436133063775, "grad_norm": 0.5315461158752441, "learning_rate": 3.827827827827828e-05, "loss": 0.9056, "step": 4045 }, { "epoch": 0.5948906451020033, "grad_norm": 0.5431270003318787, "learning_rate": 3.823823823823824e-05, "loss": 1.278, "step": 4046 }, { "epoch": 0.5950376768976291, "grad_norm": 0.6409516334533691, "learning_rate": 3.81981981981982e-05, "loss": 1.2922, "step": 4047 }, { "epoch": 0.5951847086932549, "grad_norm": 0.64371657371521, "learning_rate": 3.8158158158158156e-05, "loss": 1.2073, "step": 4048 }, { "epoch": 0.5953317404888807, "grad_norm": 0.569963276386261, "learning_rate": 3.811811811811812e-05, "loss": 1.0803, "step": 4049 }, { "epoch": 0.5954787722845065, "grad_norm": 0.7044130563735962, "learning_rate": 3.807807807807808e-05, "loss": 1.2379, "step": 4050 }, { "epoch": 0.5956258040801323, "grad_norm": 0.4542842209339142, "learning_rate": 3.8038038038038035e-05, "loss": 1.6173, "step": 4051 }, { "epoch": 0.5957728358757581, "grad_norm": 0.7704740762710571, "learning_rate": 3.7997997997998e-05, "loss": 1.0201, "step": 4052 }, { "epoch": 0.5959198676713839, "grad_norm": 0.5892595648765564, "learning_rate": 3.7957957957957963e-05, "loss": 1.0504, "step": 4053 }, { "epoch": 0.5960668994670097, "grad_norm": 0.4372253715991974, "learning_rate": 3.791791791791792e-05, "loss": 1.2975, "step": 4054 }, { "epoch": 0.5962139312626356, "grad_norm": 0.6836683750152588, "learning_rate": 3.7877877877877885e-05, "loss": 1.1459, "step": 4055 }, { "epoch": 0.5963609630582614, "grad_norm": 0.7291818857192993, "learning_rate": 3.783783783783784e-05, "loss": 1.0313, "step": 4056 }, { "epoch": 0.5965079948538872, "grad_norm": 0.726291835308075, "learning_rate": 3.77977977977978e-05, "loss": 0.9701, "step": 4057 }, { "epoch": 0.596655026649513, "grad_norm": 0.612395703792572, "learning_rate": 3.7757757757757764e-05, "loss": 1.086, "step": 4058 }, { "epoch": 0.5968020584451388, "grad_norm": 0.6365050673484802, "learning_rate": 3.771771771771772e-05, "loss": 1.0326, "step": 4059 }, { "epoch": 0.5969490902407646, "grad_norm": 0.5396293997764587, "learning_rate": 3.767767767767768e-05, "loss": 1.1978, "step": 4060 }, { "epoch": 0.5970961220363904, "grad_norm": 0.6103449463844299, "learning_rate": 3.763763763763764e-05, "loss": 1.4315, "step": 4061 }, { "epoch": 0.5972431538320162, "grad_norm": 0.578234851360321, "learning_rate": 3.75975975975976e-05, "loss": 1.2111, "step": 4062 }, { "epoch": 0.597390185627642, "grad_norm": 0.6634455323219299, "learning_rate": 3.755755755755756e-05, "loss": 1.1485, "step": 4063 }, { "epoch": 0.5975372174232678, "grad_norm": 0.6414049863815308, "learning_rate": 3.751751751751752e-05, "loss": 1.0845, "step": 4064 }, { "epoch": 0.5976842492188936, "grad_norm": 0.6344085335731506, "learning_rate": 3.747747747747748e-05, "loss": 1.623, "step": 4065 }, { "epoch": 0.5978312810145194, "grad_norm": 0.5528027415275574, "learning_rate": 3.7437437437437436e-05, "loss": 0.9752, "step": 4066 }, { "epoch": 0.5979783128101452, "grad_norm": 0.6874452233314514, "learning_rate": 3.73973973973974e-05, "loss": 0.9163, "step": 4067 }, { "epoch": 0.598125344605771, "grad_norm": 0.6430459022521973, "learning_rate": 3.735735735735736e-05, "loss": 1.3873, "step": 4068 }, { "epoch": 0.5982723764013969, "grad_norm": 0.6829120516777039, "learning_rate": 3.731731731731732e-05, "loss": 1.1443, "step": 4069 }, { "epoch": 0.5984194081970226, "grad_norm": 0.645114541053772, "learning_rate": 3.727727727727728e-05, "loss": 1.1006, "step": 4070 }, { "epoch": 0.5985664399926484, "grad_norm": 0.5709945559501648, "learning_rate": 3.7237237237237236e-05, "loss": 1.033, "step": 4071 }, { "epoch": 0.5987134717882742, "grad_norm": 0.564922571182251, "learning_rate": 3.71971971971972e-05, "loss": 1.5887, "step": 4072 }, { "epoch": 0.5988605035839, "grad_norm": 0.45770370960235596, "learning_rate": 3.715715715715716e-05, "loss": 1.2422, "step": 4073 }, { "epoch": 0.5990075353795258, "grad_norm": 0.6072172522544861, "learning_rate": 3.7117117117117115e-05, "loss": 1.361, "step": 4074 }, { "epoch": 0.5991545671751516, "grad_norm": 0.5109969973564148, "learning_rate": 3.707707707707708e-05, "loss": 1.6792, "step": 4075 }, { "epoch": 0.5993015989707774, "grad_norm": 0.6461107134819031, "learning_rate": 3.7037037037037037e-05, "loss": 1.361, "step": 4076 }, { "epoch": 0.5994486307664032, "grad_norm": 0.7485364079475403, "learning_rate": 3.6996996996996994e-05, "loss": 1.1926, "step": 4077 }, { "epoch": 0.599595662562029, "grad_norm": 0.6039025783538818, "learning_rate": 3.695695695695696e-05, "loss": 1.0503, "step": 4078 }, { "epoch": 0.5997426943576548, "grad_norm": 0.4928198456764221, "learning_rate": 3.6916916916916915e-05, "loss": 1.0041, "step": 4079 }, { "epoch": 0.5998897261532806, "grad_norm": 0.6962347626686096, "learning_rate": 3.687687687687688e-05, "loss": 1.1444, "step": 4080 }, { "epoch": 0.6000367579489064, "grad_norm": 0.5655665397644043, "learning_rate": 3.6836836836836844e-05, "loss": 0.8051, "step": 4081 }, { "epoch": 0.6001837897445322, "grad_norm": 0.4273463785648346, "learning_rate": 3.67967967967968e-05, "loss": 1.4584, "step": 4082 }, { "epoch": 0.600330821540158, "grad_norm": 0.6298366189002991, "learning_rate": 3.675675675675676e-05, "loss": 1.2586, "step": 4083 }, { "epoch": 0.6004778533357839, "grad_norm": 0.6722543835639954, "learning_rate": 3.671671671671672e-05, "loss": 1.0463, "step": 4084 }, { "epoch": 0.6006248851314097, "grad_norm": 0.630441427230835, "learning_rate": 3.667667667667668e-05, "loss": 1.1225, "step": 4085 }, { "epoch": 0.6007719169270355, "grad_norm": 0.4380250573158264, "learning_rate": 3.663663663663664e-05, "loss": 1.9328, "step": 4086 }, { "epoch": 0.6009189487226613, "grad_norm": 0.5868902206420898, "learning_rate": 3.65965965965966e-05, "loss": 1.0308, "step": 4087 }, { "epoch": 0.6010659805182871, "grad_norm": 0.7800199389457703, "learning_rate": 3.655655655655656e-05, "loss": 0.8321, "step": 4088 }, { "epoch": 0.6012130123139129, "grad_norm": 0.6868074536323547, "learning_rate": 3.6516516516516516e-05, "loss": 1.2277, "step": 4089 }, { "epoch": 0.6013600441095387, "grad_norm": 0.7668630480766296, "learning_rate": 3.647647647647648e-05, "loss": 1.0672, "step": 4090 }, { "epoch": 0.6015070759051645, "grad_norm": 0.46091166138648987, "learning_rate": 3.643643643643644e-05, "loss": 1.5572, "step": 4091 }, { "epoch": 0.6016541077007903, "grad_norm": 0.6956819295883179, "learning_rate": 3.63963963963964e-05, "loss": 1.305, "step": 4092 }, { "epoch": 0.6018011394964161, "grad_norm": 0.5944724082946777, "learning_rate": 3.635635635635636e-05, "loss": 1.2321, "step": 4093 }, { "epoch": 0.6019481712920419, "grad_norm": 0.5045302510261536, "learning_rate": 3.6316316316316316e-05, "loss": 0.8507, "step": 4094 }, { "epoch": 0.6020952030876677, "grad_norm": 0.5245832800865173, "learning_rate": 3.627627627627628e-05, "loss": 1.0838, "step": 4095 }, { "epoch": 0.6022422348832935, "grad_norm": 0.6375360488891602, "learning_rate": 3.623623623623624e-05, "loss": 1.1522, "step": 4096 }, { "epoch": 0.6023892666789193, "grad_norm": 0.5512775182723999, "learning_rate": 3.6196196196196195e-05, "loss": 1.3372, "step": 4097 }, { "epoch": 0.6025362984745452, "grad_norm": 0.49672284722328186, "learning_rate": 3.615615615615616e-05, "loss": 1.1203, "step": 4098 }, { "epoch": 0.602683330270171, "grad_norm": 0.43224018812179565, "learning_rate": 3.6116116116116116e-05, "loss": 1.4275, "step": 4099 }, { "epoch": 0.6028303620657968, "grad_norm": 0.6499729752540588, "learning_rate": 3.6076076076076074e-05, "loss": 1.1144, "step": 4100 }, { "epoch": 0.6029773938614226, "grad_norm": 0.5130301117897034, "learning_rate": 3.603603603603604e-05, "loss": 1.2863, "step": 4101 }, { "epoch": 0.6031244256570484, "grad_norm": 0.6811477541923523, "learning_rate": 3.5995995995995995e-05, "loss": 1.3042, "step": 4102 }, { "epoch": 0.6032714574526742, "grad_norm": 0.555213987827301, "learning_rate": 3.595595595595595e-05, "loss": 1.4239, "step": 4103 }, { "epoch": 0.6034184892483, "grad_norm": 0.5383201837539673, "learning_rate": 3.591591591591592e-05, "loss": 1.449, "step": 4104 }, { "epoch": 0.6035655210439258, "grad_norm": 0.5409968495368958, "learning_rate": 3.5875875875875874e-05, "loss": 0.8588, "step": 4105 }, { "epoch": 0.6037125528395516, "grad_norm": 0.655749499797821, "learning_rate": 3.583583583583583e-05, "loss": 1.1059, "step": 4106 }, { "epoch": 0.6038595846351773, "grad_norm": 0.8119927644729614, "learning_rate": 3.57957957957958e-05, "loss": 0.9805, "step": 4107 }, { "epoch": 0.6040066164308031, "grad_norm": 0.47233837842941284, "learning_rate": 3.575575575575576e-05, "loss": 1.2925, "step": 4108 }, { "epoch": 0.6041536482264289, "grad_norm": 0.5440821051597595, "learning_rate": 3.571571571571572e-05, "loss": 0.8765, "step": 4109 }, { "epoch": 0.6043006800220547, "grad_norm": 0.47246506810188293, "learning_rate": 3.567567567567568e-05, "loss": 1.3836, "step": 4110 }, { "epoch": 0.6044477118176805, "grad_norm": 0.49478694796562195, "learning_rate": 3.563563563563564e-05, "loss": 1.7512, "step": 4111 }, { "epoch": 0.6045947436133063, "grad_norm": 0.6856399774551392, "learning_rate": 3.5595595595595596e-05, "loss": 1.0257, "step": 4112 }, { "epoch": 0.6047417754089321, "grad_norm": 0.5798173546791077, "learning_rate": 3.555555555555556e-05, "loss": 1.2959, "step": 4113 }, { "epoch": 0.604888807204558, "grad_norm": 0.7735682129859924, "learning_rate": 3.551551551551552e-05, "loss": 1.2891, "step": 4114 }, { "epoch": 0.6050358390001838, "grad_norm": 0.581291675567627, "learning_rate": 3.547547547547548e-05, "loss": 1.0397, "step": 4115 }, { "epoch": 0.6051828707958096, "grad_norm": 0.6539590954780579, "learning_rate": 3.543543543543544e-05, "loss": 1.3912, "step": 4116 }, { "epoch": 0.6053299025914354, "grad_norm": 0.79493647813797, "learning_rate": 3.5395395395395396e-05, "loss": 0.8375, "step": 4117 }, { "epoch": 0.6054769343870612, "grad_norm": 0.6014711260795593, "learning_rate": 3.535535535535536e-05, "loss": 1.2094, "step": 4118 }, { "epoch": 0.605623966182687, "grad_norm": 0.6156975626945496, "learning_rate": 3.531531531531532e-05, "loss": 1.056, "step": 4119 }, { "epoch": 0.6057709979783128, "grad_norm": 0.5600055456161499, "learning_rate": 3.5275275275275275e-05, "loss": 1.5499, "step": 4120 }, { "epoch": 0.6059180297739386, "grad_norm": 0.5225142240524292, "learning_rate": 3.523523523523524e-05, "loss": 1.3047, "step": 4121 }, { "epoch": 0.6060650615695644, "grad_norm": 0.6665456891059875, "learning_rate": 3.5195195195195196e-05, "loss": 1.2103, "step": 4122 }, { "epoch": 0.6062120933651902, "grad_norm": 0.6897240877151489, "learning_rate": 3.5155155155155154e-05, "loss": 1.1014, "step": 4123 }, { "epoch": 0.606359125160816, "grad_norm": 0.6572844982147217, "learning_rate": 3.511511511511512e-05, "loss": 1.277, "step": 4124 }, { "epoch": 0.6065061569564418, "grad_norm": 0.7985073924064636, "learning_rate": 3.5075075075075075e-05, "loss": 0.9315, "step": 4125 }, { "epoch": 0.6066531887520676, "grad_norm": 0.777481734752655, "learning_rate": 3.503503503503503e-05, "loss": 0.9313, "step": 4126 }, { "epoch": 0.6068002205476934, "grad_norm": 0.6872249245643616, "learning_rate": 3.4994994994995e-05, "loss": 1.2402, "step": 4127 }, { "epoch": 0.6069472523433193, "grad_norm": 0.5694342255592346, "learning_rate": 3.4954954954954954e-05, "loss": 1.1546, "step": 4128 }, { "epoch": 0.6070942841389451, "grad_norm": 0.5878894329071045, "learning_rate": 3.491491491491491e-05, "loss": 1.4083, "step": 4129 }, { "epoch": 0.6072413159345709, "grad_norm": 0.6820914149284363, "learning_rate": 3.4874874874874875e-05, "loss": 1.0271, "step": 4130 }, { "epoch": 0.6073883477301967, "grad_norm": 0.576262891292572, "learning_rate": 3.483483483483483e-05, "loss": 0.9992, "step": 4131 }, { "epoch": 0.6075353795258225, "grad_norm": 0.7268515229225159, "learning_rate": 3.47947947947948e-05, "loss": 0.9373, "step": 4132 }, { "epoch": 0.6076824113214483, "grad_norm": 0.4334571957588196, "learning_rate": 3.475475475475476e-05, "loss": 1.039, "step": 4133 }, { "epoch": 0.6078294431170741, "grad_norm": 0.529991626739502, "learning_rate": 3.471471471471472e-05, "loss": 1.096, "step": 4134 }, { "epoch": 0.6079764749126999, "grad_norm": 0.46822240948677063, "learning_rate": 3.4674674674674676e-05, "loss": 1.2395, "step": 4135 }, { "epoch": 0.6081235067083257, "grad_norm": 0.4201137125492096, "learning_rate": 3.463463463463464e-05, "loss": 1.0335, "step": 4136 }, { "epoch": 0.6082705385039515, "grad_norm": 0.6610150933265686, "learning_rate": 3.45945945945946e-05, "loss": 0.9066, "step": 4137 }, { "epoch": 0.6084175702995773, "grad_norm": 0.7922123074531555, "learning_rate": 3.4554554554554555e-05, "loss": 0.8484, "step": 4138 }, { "epoch": 0.6085646020952031, "grad_norm": 0.5686684250831604, "learning_rate": 3.451451451451452e-05, "loss": 1.2497, "step": 4139 }, { "epoch": 0.6087116338908289, "grad_norm": 0.7143505215644836, "learning_rate": 3.4474474474474476e-05, "loss": 1.0606, "step": 4140 }, { "epoch": 0.6088586656864547, "grad_norm": 0.6999115943908691, "learning_rate": 3.443443443443444e-05, "loss": 0.9899, "step": 4141 }, { "epoch": 0.6090056974820806, "grad_norm": 0.6234632134437561, "learning_rate": 3.43943943943944e-05, "loss": 1.0223, "step": 4142 }, { "epoch": 0.6091527292777063, "grad_norm": 0.7282074093818665, "learning_rate": 3.4354354354354355e-05, "loss": 0.8161, "step": 4143 }, { "epoch": 0.6092997610733321, "grad_norm": 0.5394673347473145, "learning_rate": 3.431431431431432e-05, "loss": 1.3071, "step": 4144 }, { "epoch": 0.6094467928689579, "grad_norm": 0.4576806128025055, "learning_rate": 3.4274274274274276e-05, "loss": 1.1481, "step": 4145 }, { "epoch": 0.6095938246645837, "grad_norm": 0.5566396713256836, "learning_rate": 3.4234234234234234e-05, "loss": 1.1755, "step": 4146 }, { "epoch": 0.6097408564602095, "grad_norm": 0.7159025073051453, "learning_rate": 3.41941941941942e-05, "loss": 1.0832, "step": 4147 }, { "epoch": 0.6098878882558353, "grad_norm": 0.45188769698143005, "learning_rate": 3.4154154154154155e-05, "loss": 1.6229, "step": 4148 }, { "epoch": 0.6100349200514611, "grad_norm": 0.5813368558883667, "learning_rate": 3.411411411411411e-05, "loss": 1.2918, "step": 4149 }, { "epoch": 0.6101819518470869, "grad_norm": 0.5412317514419556, "learning_rate": 3.4074074074074077e-05, "loss": 1.4571, "step": 4150 }, { "epoch": 0.6103289836427127, "grad_norm": 0.7718943357467651, "learning_rate": 3.4034034034034034e-05, "loss": 1.2947, "step": 4151 }, { "epoch": 0.6104760154383385, "grad_norm": 0.614150881767273, "learning_rate": 3.399399399399399e-05, "loss": 1.1185, "step": 4152 }, { "epoch": 0.6106230472339643, "grad_norm": 0.7094654440879822, "learning_rate": 3.3953953953953955e-05, "loss": 1.1359, "step": 4153 }, { "epoch": 0.6107700790295901, "grad_norm": 0.669365406036377, "learning_rate": 3.391391391391391e-05, "loss": 1.2154, "step": 4154 }, { "epoch": 0.6109171108252159, "grad_norm": 0.708407998085022, "learning_rate": 3.387387387387388e-05, "loss": 0.8754, "step": 4155 }, { "epoch": 0.6110641426208417, "grad_norm": 0.749497652053833, "learning_rate": 3.3833833833833834e-05, "loss": 1.1317, "step": 4156 }, { "epoch": 0.6112111744164676, "grad_norm": 0.65801602602005, "learning_rate": 3.379379379379379e-05, "loss": 1.1616, "step": 4157 }, { "epoch": 0.6113582062120934, "grad_norm": 0.6862016916275024, "learning_rate": 3.3753753753753756e-05, "loss": 0.978, "step": 4158 }, { "epoch": 0.6115052380077192, "grad_norm": 0.5609137415885925, "learning_rate": 3.371371371371371e-05, "loss": 1.3413, "step": 4159 }, { "epoch": 0.611652269803345, "grad_norm": 0.5177550315856934, "learning_rate": 3.367367367367368e-05, "loss": 1.0029, "step": 4160 }, { "epoch": 0.6117993015989708, "grad_norm": 0.5843351483345032, "learning_rate": 3.3633633633633635e-05, "loss": 1.2447, "step": 4161 }, { "epoch": 0.6119463333945966, "grad_norm": 0.7779857516288757, "learning_rate": 3.35935935935936e-05, "loss": 0.8976, "step": 4162 }, { "epoch": 0.6120933651902224, "grad_norm": 0.4688640534877777, "learning_rate": 3.3553553553553556e-05, "loss": 1.0855, "step": 4163 }, { "epoch": 0.6122403969858482, "grad_norm": 0.4901147782802582, "learning_rate": 3.351351351351352e-05, "loss": 1.6227, "step": 4164 }, { "epoch": 0.612387428781474, "grad_norm": 0.609826922416687, "learning_rate": 3.347347347347348e-05, "loss": 1.0975, "step": 4165 }, { "epoch": 0.6125344605770998, "grad_norm": 0.5604642629623413, "learning_rate": 3.3433433433433435e-05, "loss": 1.0853, "step": 4166 }, { "epoch": 0.6126814923727256, "grad_norm": 0.818107545375824, "learning_rate": 3.33933933933934e-05, "loss": 1.1853, "step": 4167 }, { "epoch": 0.6128285241683514, "grad_norm": 0.6022142171859741, "learning_rate": 3.3353353353353356e-05, "loss": 1.3921, "step": 4168 }, { "epoch": 0.6129755559639772, "grad_norm": 0.6505847573280334, "learning_rate": 3.3313313313313314e-05, "loss": 1.224, "step": 4169 }, { "epoch": 0.613122587759603, "grad_norm": 0.4344868063926697, "learning_rate": 3.327327327327328e-05, "loss": 1.2151, "step": 4170 }, { "epoch": 0.6132696195552289, "grad_norm": 0.5892714262008667, "learning_rate": 3.3233233233233235e-05, "loss": 1.2395, "step": 4171 }, { "epoch": 0.6134166513508547, "grad_norm": 0.5215597152709961, "learning_rate": 3.319319319319319e-05, "loss": 1.2001, "step": 4172 }, { "epoch": 0.6135636831464805, "grad_norm": 0.7077812552452087, "learning_rate": 3.3153153153153157e-05, "loss": 0.9811, "step": 4173 }, { "epoch": 0.6137107149421063, "grad_norm": 0.42936259508132935, "learning_rate": 3.3113113113113114e-05, "loss": 1.2497, "step": 4174 }, { "epoch": 0.6138577467377321, "grad_norm": 0.5679152607917786, "learning_rate": 3.307307307307307e-05, "loss": 1.1455, "step": 4175 }, { "epoch": 0.6140047785333579, "grad_norm": 0.6069644093513489, "learning_rate": 3.3033033033033035e-05, "loss": 1.1516, "step": 4176 }, { "epoch": 0.6141518103289837, "grad_norm": 0.6831707954406738, "learning_rate": 3.299299299299299e-05, "loss": 1.0691, "step": 4177 }, { "epoch": 0.6142988421246095, "grad_norm": 0.6953707337379456, "learning_rate": 3.295295295295296e-05, "loss": 1.2889, "step": 4178 }, { "epoch": 0.6144458739202352, "grad_norm": 0.41600751876831055, "learning_rate": 3.2912912912912914e-05, "loss": 1.4889, "step": 4179 }, { "epoch": 0.614592905715861, "grad_norm": 0.7742583155632019, "learning_rate": 3.287287287287287e-05, "loss": 1.0101, "step": 4180 }, { "epoch": 0.6147399375114868, "grad_norm": 0.5785536170005798, "learning_rate": 3.2832832832832836e-05, "loss": 1.2648, "step": 4181 }, { "epoch": 0.6148869693071126, "grad_norm": 0.5195890069007874, "learning_rate": 3.279279279279279e-05, "loss": 1.0621, "step": 4182 }, { "epoch": 0.6150340011027384, "grad_norm": 0.41763874888420105, "learning_rate": 3.275275275275275e-05, "loss": 1.1332, "step": 4183 }, { "epoch": 0.6151810328983642, "grad_norm": 0.5021741986274719, "learning_rate": 3.2712712712712714e-05, "loss": 1.5735, "step": 4184 }, { "epoch": 0.61532806469399, "grad_norm": 0.6877545118331909, "learning_rate": 3.267267267267267e-05, "loss": 1.2678, "step": 4185 }, { "epoch": 0.6154750964896158, "grad_norm": 0.6416240930557251, "learning_rate": 3.263263263263263e-05, "loss": 1.4609, "step": 4186 }, { "epoch": 0.6156221282852417, "grad_norm": 0.49565497040748596, "learning_rate": 3.25925925925926e-05, "loss": 1.0842, "step": 4187 }, { "epoch": 0.6157691600808675, "grad_norm": 0.607653021812439, "learning_rate": 3.255255255255256e-05, "loss": 1.0442, "step": 4188 }, { "epoch": 0.6159161918764933, "grad_norm": 0.5680317282676697, "learning_rate": 3.2512512512512515e-05, "loss": 0.9726, "step": 4189 }, { "epoch": 0.6160632236721191, "grad_norm": 0.6337454915046692, "learning_rate": 3.247247247247248e-05, "loss": 1.053, "step": 4190 }, { "epoch": 0.6162102554677449, "grad_norm": 0.5967862606048584, "learning_rate": 3.2432432432432436e-05, "loss": 1.1875, "step": 4191 }, { "epoch": 0.6163572872633707, "grad_norm": 0.7857481837272644, "learning_rate": 3.2392392392392394e-05, "loss": 1.0223, "step": 4192 }, { "epoch": 0.6165043190589965, "grad_norm": 0.6474841833114624, "learning_rate": 3.235235235235236e-05, "loss": 1.3101, "step": 4193 }, { "epoch": 0.6166513508546223, "grad_norm": 0.8094004988670349, "learning_rate": 3.2312312312312315e-05, "loss": 1.3547, "step": 4194 }, { "epoch": 0.6167983826502481, "grad_norm": 0.5998041033744812, "learning_rate": 3.227227227227227e-05, "loss": 1.2663, "step": 4195 }, { "epoch": 0.6169454144458739, "grad_norm": 0.6212366223335266, "learning_rate": 3.2232232232232236e-05, "loss": 1.2206, "step": 4196 }, { "epoch": 0.6170924462414997, "grad_norm": 0.482109934091568, "learning_rate": 3.2192192192192194e-05, "loss": 1.1564, "step": 4197 }, { "epoch": 0.6172394780371255, "grad_norm": 0.6368656754493713, "learning_rate": 3.215215215215215e-05, "loss": 1.2002, "step": 4198 }, { "epoch": 0.6173865098327513, "grad_norm": 0.5843809247016907, "learning_rate": 3.2112112112112115e-05, "loss": 0.9551, "step": 4199 }, { "epoch": 0.6175335416283771, "grad_norm": 0.5435611009597778, "learning_rate": 3.207207207207207e-05, "loss": 1.8778, "step": 4200 }, { "epoch": 0.617680573424003, "grad_norm": 0.8950812220573425, "learning_rate": 3.203203203203203e-05, "loss": 1.0485, "step": 4201 }, { "epoch": 0.6178276052196288, "grad_norm": 0.5123649835586548, "learning_rate": 3.1991991991991994e-05, "loss": 0.9742, "step": 4202 }, { "epoch": 0.6179746370152546, "grad_norm": 0.496139794588089, "learning_rate": 3.195195195195195e-05, "loss": 1.0751, "step": 4203 }, { "epoch": 0.6181216688108804, "grad_norm": 0.6886441111564636, "learning_rate": 3.1911911911911916e-05, "loss": 1.073, "step": 4204 }, { "epoch": 0.6182687006065062, "grad_norm": 0.5890141725540161, "learning_rate": 3.187187187187187e-05, "loss": 0.7812, "step": 4205 }, { "epoch": 0.618415732402132, "grad_norm": 0.5376361012458801, "learning_rate": 3.183183183183183e-05, "loss": 1.2949, "step": 4206 }, { "epoch": 0.6185627641977578, "grad_norm": 0.7465544939041138, "learning_rate": 3.1791791791791794e-05, "loss": 1.0578, "step": 4207 }, { "epoch": 0.6187097959933836, "grad_norm": 0.6010075211524963, "learning_rate": 3.175175175175175e-05, "loss": 1.3876, "step": 4208 }, { "epoch": 0.6188568277890094, "grad_norm": 0.6066538691520691, "learning_rate": 3.171171171171171e-05, "loss": 1.6634, "step": 4209 }, { "epoch": 0.6190038595846352, "grad_norm": 0.6352358460426331, "learning_rate": 3.167167167167167e-05, "loss": 1.1451, "step": 4210 }, { "epoch": 0.619150891380261, "grad_norm": 0.4311196506023407, "learning_rate": 3.163163163163163e-05, "loss": 1.1752, "step": 4211 }, { "epoch": 0.6192979231758868, "grad_norm": 0.474315881729126, "learning_rate": 3.159159159159159e-05, "loss": 1.0053, "step": 4212 }, { "epoch": 0.6194449549715126, "grad_norm": 0.5304727554321289, "learning_rate": 3.155155155155155e-05, "loss": 2.1811, "step": 4213 }, { "epoch": 0.6195919867671384, "grad_norm": 0.5246592164039612, "learning_rate": 3.1511511511511516e-05, "loss": 1.1388, "step": 4214 }, { "epoch": 0.6197390185627641, "grad_norm": 0.6717717051506042, "learning_rate": 3.1471471471471473e-05, "loss": 1.2705, "step": 4215 }, { "epoch": 0.61988605035839, "grad_norm": 0.45819154381752014, "learning_rate": 3.143143143143144e-05, "loss": 1.2206, "step": 4216 }, { "epoch": 0.6200330821540158, "grad_norm": 0.5067896842956543, "learning_rate": 3.1391391391391395e-05, "loss": 1.6991, "step": 4217 }, { "epoch": 0.6201801139496416, "grad_norm": 0.8164172172546387, "learning_rate": 3.135135135135135e-05, "loss": 1.0841, "step": 4218 }, { "epoch": 0.6203271457452674, "grad_norm": 0.6701242327690125, "learning_rate": 3.1311311311311316e-05, "loss": 0.9923, "step": 4219 }, { "epoch": 0.6204741775408932, "grad_norm": 0.4663488566875458, "learning_rate": 3.1271271271271274e-05, "loss": 1.5285, "step": 4220 }, { "epoch": 0.620621209336519, "grad_norm": 0.4589792490005493, "learning_rate": 3.123123123123123e-05, "loss": 1.5119, "step": 4221 }, { "epoch": 0.6207682411321448, "grad_norm": 0.8012060523033142, "learning_rate": 3.1191191191191195e-05, "loss": 0.9258, "step": 4222 }, { "epoch": 0.6209152729277706, "grad_norm": 0.6686880588531494, "learning_rate": 3.115115115115115e-05, "loss": 1.2182, "step": 4223 }, { "epoch": 0.6210623047233964, "grad_norm": 0.5338221788406372, "learning_rate": 3.111111111111111e-05, "loss": 0.8283, "step": 4224 }, { "epoch": 0.6212093365190222, "grad_norm": 0.6295833587646484, "learning_rate": 3.1071071071071074e-05, "loss": 1.0503, "step": 4225 }, { "epoch": 0.621356368314648, "grad_norm": 1.0890929698944092, "learning_rate": 3.103103103103103e-05, "loss": 0.8986, "step": 4226 }, { "epoch": 0.6215034001102738, "grad_norm": 0.5466305613517761, "learning_rate": 3.0990990990990995e-05, "loss": 1.0374, "step": 4227 }, { "epoch": 0.6216504319058996, "grad_norm": 0.5889538526535034, "learning_rate": 3.095095095095095e-05, "loss": 1.0741, "step": 4228 }, { "epoch": 0.6217974637015254, "grad_norm": 0.5809684991836548, "learning_rate": 3.091091091091091e-05, "loss": 1.2546, "step": 4229 }, { "epoch": 0.6219444954971513, "grad_norm": 0.635016679763794, "learning_rate": 3.0870870870870874e-05, "loss": 1.0829, "step": 4230 }, { "epoch": 0.6220915272927771, "grad_norm": 0.6403592228889465, "learning_rate": 3.083083083083083e-05, "loss": 0.974, "step": 4231 }, { "epoch": 0.6222385590884029, "grad_norm": 0.461317777633667, "learning_rate": 3.079079079079079e-05, "loss": 1.6315, "step": 4232 }, { "epoch": 0.6223855908840287, "grad_norm": 0.5779048204421997, "learning_rate": 3.075075075075075e-05, "loss": 0.937, "step": 4233 }, { "epoch": 0.6225326226796545, "grad_norm": 0.6516251564025879, "learning_rate": 3.071071071071071e-05, "loss": 1.1669, "step": 4234 }, { "epoch": 0.6226796544752803, "grad_norm": 0.7742251753807068, "learning_rate": 3.067067067067067e-05, "loss": 1.0459, "step": 4235 }, { "epoch": 0.6228266862709061, "grad_norm": 0.555911660194397, "learning_rate": 3.063063063063063e-05, "loss": 1.4197, "step": 4236 }, { "epoch": 0.6229737180665319, "grad_norm": 0.4766096770763397, "learning_rate": 3.059059059059059e-05, "loss": 1.0913, "step": 4237 }, { "epoch": 0.6231207498621577, "grad_norm": 0.600379228591919, "learning_rate": 3.0550550550550547e-05, "loss": 1.2792, "step": 4238 }, { "epoch": 0.6232677816577835, "grad_norm": 0.672394335269928, "learning_rate": 3.051051051051051e-05, "loss": 1.2588, "step": 4239 }, { "epoch": 0.6234148134534093, "grad_norm": 0.6255964636802673, "learning_rate": 3.0470470470470475e-05, "loss": 1.1825, "step": 4240 }, { "epoch": 0.6235618452490351, "grad_norm": 0.5753094553947449, "learning_rate": 3.0430430430430436e-05, "loss": 0.7937, "step": 4241 }, { "epoch": 0.6237088770446609, "grad_norm": 0.5839491486549377, "learning_rate": 3.0390390390390393e-05, "loss": 1.0949, "step": 4242 }, { "epoch": 0.6238559088402867, "grad_norm": 0.47187697887420654, "learning_rate": 3.0350350350350354e-05, "loss": 1.1564, "step": 4243 }, { "epoch": 0.6240029406359126, "grad_norm": 0.5965775847434998, "learning_rate": 3.0310310310310314e-05, "loss": 1.1665, "step": 4244 }, { "epoch": 0.6241499724315384, "grad_norm": 0.7833840250968933, "learning_rate": 3.0270270270270272e-05, "loss": 0.9917, "step": 4245 }, { "epoch": 0.6242970042271642, "grad_norm": 0.7390690445899963, "learning_rate": 3.0230230230230232e-05, "loss": 1.0113, "step": 4246 }, { "epoch": 0.62444403602279, "grad_norm": 0.7212024331092834, "learning_rate": 3.0190190190190193e-05, "loss": 1.0608, "step": 4247 }, { "epoch": 0.6245910678184158, "grad_norm": 0.9431434273719788, "learning_rate": 3.0150150150150154e-05, "loss": 1.4332, "step": 4248 }, { "epoch": 0.6247380996140416, "grad_norm": 0.40740448236465454, "learning_rate": 3.011011011011011e-05, "loss": 1.3877, "step": 4249 }, { "epoch": 0.6248851314096674, "grad_norm": 0.6397035717964172, "learning_rate": 3.0070070070070072e-05, "loss": 1.291, "step": 4250 }, { "epoch": 0.6250321632052931, "grad_norm": 0.6404983401298523, "learning_rate": 3.0030030030030033e-05, "loss": 1.2003, "step": 4251 }, { "epoch": 0.6251791950009189, "grad_norm": 0.6021907925605774, "learning_rate": 2.998998998998999e-05, "loss": 0.8779, "step": 4252 }, { "epoch": 0.6253262267965447, "grad_norm": 0.5747865438461304, "learning_rate": 2.994994994994995e-05, "loss": 1.2857, "step": 4253 }, { "epoch": 0.6254732585921705, "grad_norm": 0.5759550333023071, "learning_rate": 2.990990990990991e-05, "loss": 1.3588, "step": 4254 }, { "epoch": 0.6256202903877963, "grad_norm": 0.6308997273445129, "learning_rate": 2.9869869869869872e-05, "loss": 1.1553, "step": 4255 }, { "epoch": 0.6257673221834221, "grad_norm": 0.48900341987609863, "learning_rate": 2.982982982982983e-05, "loss": 1.0809, "step": 4256 }, { "epoch": 0.6259143539790479, "grad_norm": 0.7460419535636902, "learning_rate": 2.978978978978979e-05, "loss": 1.2569, "step": 4257 }, { "epoch": 0.6260613857746737, "grad_norm": 0.46737247705459595, "learning_rate": 2.974974974974975e-05, "loss": 1.4042, "step": 4258 }, { "epoch": 0.6262084175702995, "grad_norm": 0.582374632358551, "learning_rate": 2.970970970970971e-05, "loss": 1.5074, "step": 4259 }, { "epoch": 0.6263554493659254, "grad_norm": 0.6093973517417908, "learning_rate": 2.966966966966967e-05, "loss": 0.8969, "step": 4260 }, { "epoch": 0.6265024811615512, "grad_norm": 0.8083664774894714, "learning_rate": 2.962962962962963e-05, "loss": 1.0365, "step": 4261 }, { "epoch": 0.626649512957177, "grad_norm": 0.6138285398483276, "learning_rate": 2.958958958958959e-05, "loss": 1.2069, "step": 4262 }, { "epoch": 0.6267965447528028, "grad_norm": 0.575908362865448, "learning_rate": 2.9549549549549548e-05, "loss": 1.5531, "step": 4263 }, { "epoch": 0.6269435765484286, "grad_norm": 0.6096529960632324, "learning_rate": 2.950950950950951e-05, "loss": 1.4008, "step": 4264 }, { "epoch": 0.6270906083440544, "grad_norm": 0.7096732258796692, "learning_rate": 2.946946946946947e-05, "loss": 0.8042, "step": 4265 }, { "epoch": 0.6272376401396802, "grad_norm": 1.0051008462905884, "learning_rate": 2.9429429429429427e-05, "loss": 0.8972, "step": 4266 }, { "epoch": 0.627384671935306, "grad_norm": 0.6008688807487488, "learning_rate": 2.9389389389389394e-05, "loss": 1.1666, "step": 4267 }, { "epoch": 0.6275317037309318, "grad_norm": 0.7297758460044861, "learning_rate": 2.934934934934935e-05, "loss": 1.0004, "step": 4268 }, { "epoch": 0.6276787355265576, "grad_norm": 0.47648367285728455, "learning_rate": 2.9309309309309312e-05, "loss": 1.0502, "step": 4269 }, { "epoch": 0.6278257673221834, "grad_norm": 0.7368243932723999, "learning_rate": 2.9269269269269273e-05, "loss": 1.0649, "step": 4270 }, { "epoch": 0.6279727991178092, "grad_norm": 0.6183923482894897, "learning_rate": 2.9229229229229234e-05, "loss": 1.0824, "step": 4271 }, { "epoch": 0.628119830913435, "grad_norm": 0.5836455225944519, "learning_rate": 2.918918918918919e-05, "loss": 1.2787, "step": 4272 }, { "epoch": 0.6282668627090608, "grad_norm": 0.8230308294296265, "learning_rate": 2.9149149149149152e-05, "loss": 0.8126, "step": 4273 }, { "epoch": 0.6284138945046867, "grad_norm": 0.75684654712677, "learning_rate": 2.9109109109109113e-05, "loss": 1.1892, "step": 4274 }, { "epoch": 0.6285609263003125, "grad_norm": 0.5226010084152222, "learning_rate": 2.906906906906907e-05, "loss": 0.9271, "step": 4275 }, { "epoch": 0.6287079580959383, "grad_norm": 0.5828632712364197, "learning_rate": 2.902902902902903e-05, "loss": 1.2874, "step": 4276 }, { "epoch": 0.6288549898915641, "grad_norm": 0.6638287305831909, "learning_rate": 2.898898898898899e-05, "loss": 1.1696, "step": 4277 }, { "epoch": 0.6290020216871899, "grad_norm": 0.7130815386772156, "learning_rate": 2.8948948948948952e-05, "loss": 1.2645, "step": 4278 }, { "epoch": 0.6291490534828157, "grad_norm": 0.5322955250740051, "learning_rate": 2.890890890890891e-05, "loss": 1.4332, "step": 4279 }, { "epoch": 0.6292960852784415, "grad_norm": 0.5782533288002014, "learning_rate": 2.886886886886887e-05, "loss": 1.0552, "step": 4280 }, { "epoch": 0.6294431170740673, "grad_norm": 0.5212031602859497, "learning_rate": 2.882882882882883e-05, "loss": 0.889, "step": 4281 }, { "epoch": 0.6295901488696931, "grad_norm": 0.6697284579277039, "learning_rate": 2.878878878878879e-05, "loss": 1.2393, "step": 4282 }, { "epoch": 0.6297371806653189, "grad_norm": 0.6215652823448181, "learning_rate": 2.874874874874875e-05, "loss": 1.1652, "step": 4283 }, { "epoch": 0.6298842124609447, "grad_norm": 0.8173681497573853, "learning_rate": 2.870870870870871e-05, "loss": 0.8187, "step": 4284 }, { "epoch": 0.6300312442565705, "grad_norm": 0.6676235795021057, "learning_rate": 2.866866866866867e-05, "loss": 1.0015, "step": 4285 }, { "epoch": 0.6301782760521963, "grad_norm": 0.671282947063446, "learning_rate": 2.8628628628628628e-05, "loss": 1.355, "step": 4286 }, { "epoch": 0.630325307847822, "grad_norm": 0.7453750967979431, "learning_rate": 2.858858858858859e-05, "loss": 0.7784, "step": 4287 }, { "epoch": 0.6304723396434478, "grad_norm": 0.5771030187606812, "learning_rate": 2.854854854854855e-05, "loss": 1.1999, "step": 4288 }, { "epoch": 0.6306193714390737, "grad_norm": 0.44703415036201477, "learning_rate": 2.8508508508508507e-05, "loss": 1.5939, "step": 4289 }, { "epoch": 0.6307664032346995, "grad_norm": 0.590477466583252, "learning_rate": 2.8468468468468467e-05, "loss": 1.4734, "step": 4290 }, { "epoch": 0.6309134350303253, "grad_norm": 0.7566031217575073, "learning_rate": 2.8428428428428428e-05, "loss": 0.8469, "step": 4291 }, { "epoch": 0.6310604668259511, "grad_norm": 0.5553813576698303, "learning_rate": 2.8388388388388386e-05, "loss": 1.284, "step": 4292 }, { "epoch": 0.6312074986215769, "grad_norm": 0.5277453660964966, "learning_rate": 2.8348348348348346e-05, "loss": 1.0969, "step": 4293 }, { "epoch": 0.6313545304172027, "grad_norm": 0.6283857226371765, "learning_rate": 2.8308308308308314e-05, "loss": 1.3468, "step": 4294 }, { "epoch": 0.6315015622128285, "grad_norm": 0.6867744326591492, "learning_rate": 2.826826826826827e-05, "loss": 1.1451, "step": 4295 }, { "epoch": 0.6316485940084543, "grad_norm": 0.48861682415008545, "learning_rate": 2.8228228228228232e-05, "loss": 0.9022, "step": 4296 }, { "epoch": 0.6317956258040801, "grad_norm": 0.7478650808334351, "learning_rate": 2.8188188188188193e-05, "loss": 1.0382, "step": 4297 }, { "epoch": 0.6319426575997059, "grad_norm": 0.6483995318412781, "learning_rate": 2.814814814814815e-05, "loss": 1.1174, "step": 4298 }, { "epoch": 0.6320896893953317, "grad_norm": 0.7797597050666809, "learning_rate": 2.810810810810811e-05, "loss": 0.7388, "step": 4299 }, { "epoch": 0.6322367211909575, "grad_norm": 0.318663090467453, "learning_rate": 2.806806806806807e-05, "loss": 0.556, "step": 4300 }, { "epoch": 0.6323837529865833, "grad_norm": 0.6031324863433838, "learning_rate": 2.8028028028028032e-05, "loss": 0.898, "step": 4301 }, { "epoch": 0.6325307847822091, "grad_norm": 0.5733492970466614, "learning_rate": 2.798798798798799e-05, "loss": 0.8748, "step": 4302 }, { "epoch": 0.632677816577835, "grad_norm": 0.6218776702880859, "learning_rate": 2.794794794794795e-05, "loss": 1.0071, "step": 4303 }, { "epoch": 0.6328248483734608, "grad_norm": 0.846297025680542, "learning_rate": 2.790790790790791e-05, "loss": 1.0437, "step": 4304 }, { "epoch": 0.6329718801690866, "grad_norm": 0.4867134988307953, "learning_rate": 2.786786786786787e-05, "loss": 1.4971, "step": 4305 }, { "epoch": 0.6331189119647124, "grad_norm": 0.5139868855476379, "learning_rate": 2.782782782782783e-05, "loss": 0.874, "step": 4306 }, { "epoch": 0.6332659437603382, "grad_norm": 0.7467309832572937, "learning_rate": 2.778778778778779e-05, "loss": 1.2678, "step": 4307 }, { "epoch": 0.633412975555964, "grad_norm": 0.6350986957550049, "learning_rate": 2.7747747747747747e-05, "loss": 1.11, "step": 4308 }, { "epoch": 0.6335600073515898, "grad_norm": 0.6554126143455505, "learning_rate": 2.7707707707707708e-05, "loss": 1.2752, "step": 4309 }, { "epoch": 0.6337070391472156, "grad_norm": 0.666924238204956, "learning_rate": 2.766766766766767e-05, "loss": 0.8295, "step": 4310 }, { "epoch": 0.6338540709428414, "grad_norm": 0.5755051374435425, "learning_rate": 2.762762762762763e-05, "loss": 1.5989, "step": 4311 }, { "epoch": 0.6340011027384672, "grad_norm": 0.5991459488868713, "learning_rate": 2.7587587587587587e-05, "loss": 1.3514, "step": 4312 }, { "epoch": 0.634148134534093, "grad_norm": 0.6886484622955322, "learning_rate": 2.7547547547547547e-05, "loss": 1.1742, "step": 4313 }, { "epoch": 0.6342951663297188, "grad_norm": 0.5064861178398132, "learning_rate": 2.7507507507507508e-05, "loss": 1.0135, "step": 4314 }, { "epoch": 0.6344421981253446, "grad_norm": 0.5612664818763733, "learning_rate": 2.7467467467467465e-05, "loss": 1.3868, "step": 4315 }, { "epoch": 0.6345892299209704, "grad_norm": 0.9957058429718018, "learning_rate": 2.7427427427427426e-05, "loss": 0.9281, "step": 4316 }, { "epoch": 0.6347362617165963, "grad_norm": 0.7128334045410156, "learning_rate": 2.7387387387387387e-05, "loss": 1.1332, "step": 4317 }, { "epoch": 0.6348832935122221, "grad_norm": 0.48089802265167236, "learning_rate": 2.7347347347347348e-05, "loss": 1.4667, "step": 4318 }, { "epoch": 0.6350303253078479, "grad_norm": 0.7080612778663635, "learning_rate": 2.7307307307307305e-05, "loss": 1.1805, "step": 4319 }, { "epoch": 0.6351773571034737, "grad_norm": 0.5868220329284668, "learning_rate": 2.7267267267267273e-05, "loss": 0.9821, "step": 4320 }, { "epoch": 0.6353243888990995, "grad_norm": 0.63236004114151, "learning_rate": 2.722722722722723e-05, "loss": 1.2489, "step": 4321 }, { "epoch": 0.6354714206947253, "grad_norm": 0.7023555636405945, "learning_rate": 2.718718718718719e-05, "loss": 1.1169, "step": 4322 }, { "epoch": 0.6356184524903511, "grad_norm": 0.7319645285606384, "learning_rate": 2.714714714714715e-05, "loss": 1.1206, "step": 4323 }, { "epoch": 0.6357654842859768, "grad_norm": 0.44233444333076477, "learning_rate": 2.710710710710711e-05, "loss": 1.1017, "step": 4324 }, { "epoch": 0.6359125160816026, "grad_norm": 0.7265629172325134, "learning_rate": 2.706706706706707e-05, "loss": 1.094, "step": 4325 }, { "epoch": 0.6360595478772284, "grad_norm": 0.5348374843597412, "learning_rate": 2.702702702702703e-05, "loss": 1.0908, "step": 4326 }, { "epoch": 0.6362065796728542, "grad_norm": 0.7670673131942749, "learning_rate": 2.698698698698699e-05, "loss": 1.0168, "step": 4327 }, { "epoch": 0.63635361146848, "grad_norm": 0.6110140681266785, "learning_rate": 2.6946946946946948e-05, "loss": 1.3835, "step": 4328 }, { "epoch": 0.6365006432641058, "grad_norm": 0.5442315936088562, "learning_rate": 2.690690690690691e-05, "loss": 0.933, "step": 4329 }, { "epoch": 0.6366476750597316, "grad_norm": 0.6536117792129517, "learning_rate": 2.686686686686687e-05, "loss": 1.0519, "step": 4330 }, { "epoch": 0.6367947068553574, "grad_norm": 0.436906099319458, "learning_rate": 2.6826826826826827e-05, "loss": 1.147, "step": 4331 }, { "epoch": 0.6369417386509832, "grad_norm": 0.6214673519134521, "learning_rate": 2.6786786786786788e-05, "loss": 1.1796, "step": 4332 }, { "epoch": 0.637088770446609, "grad_norm": 0.5310282707214355, "learning_rate": 2.674674674674675e-05, "loss": 0.9727, "step": 4333 }, { "epoch": 0.6372358022422349, "grad_norm": 0.5637466311454773, "learning_rate": 2.670670670670671e-05, "loss": 1.2216, "step": 4334 }, { "epoch": 0.6373828340378607, "grad_norm": 0.6221277713775635, "learning_rate": 2.6666666666666667e-05, "loss": 1.0587, "step": 4335 }, { "epoch": 0.6375298658334865, "grad_norm": 0.7122284173965454, "learning_rate": 2.6626626626626627e-05, "loss": 1.0435, "step": 4336 }, { "epoch": 0.6376768976291123, "grad_norm": 0.490427166223526, "learning_rate": 2.6586586586586588e-05, "loss": 0.9666, "step": 4337 }, { "epoch": 0.6378239294247381, "grad_norm": 0.7436506748199463, "learning_rate": 2.6546546546546545e-05, "loss": 0.8685, "step": 4338 }, { "epoch": 0.6379709612203639, "grad_norm": 0.7010948061943054, "learning_rate": 2.6506506506506506e-05, "loss": 0.9763, "step": 4339 }, { "epoch": 0.6381179930159897, "grad_norm": 0.6567335724830627, "learning_rate": 2.6466466466466467e-05, "loss": 1.2395, "step": 4340 }, { "epoch": 0.6382650248116155, "grad_norm": 0.5527465343475342, "learning_rate": 2.6426426426426428e-05, "loss": 1.4492, "step": 4341 }, { "epoch": 0.6384120566072413, "grad_norm": 0.7382085919380188, "learning_rate": 2.6386386386386385e-05, "loss": 1.1181, "step": 4342 }, { "epoch": 0.6385590884028671, "grad_norm": 0.6320787668228149, "learning_rate": 2.6346346346346346e-05, "loss": 0.9587, "step": 4343 }, { "epoch": 0.6387061201984929, "grad_norm": 0.6082367897033691, "learning_rate": 2.6306306306306306e-05, "loss": 1.5938, "step": 4344 }, { "epoch": 0.6388531519941187, "grad_norm": 0.5495527982711792, "learning_rate": 2.6266266266266264e-05, "loss": 1.0596, "step": 4345 }, { "epoch": 0.6390001837897445, "grad_norm": 0.5915751457214355, "learning_rate": 2.6226226226226224e-05, "loss": 1.0441, "step": 4346 }, { "epoch": 0.6391472155853704, "grad_norm": 0.5526443719863892, "learning_rate": 2.618618618618619e-05, "loss": 0.9895, "step": 4347 }, { "epoch": 0.6392942473809962, "grad_norm": 0.781510591506958, "learning_rate": 2.614614614614615e-05, "loss": 0.7651, "step": 4348 }, { "epoch": 0.639441279176622, "grad_norm": 0.7700202465057373, "learning_rate": 2.610610610610611e-05, "loss": 0.8188, "step": 4349 }, { "epoch": 0.6395883109722478, "grad_norm": 0.5619633793830872, "learning_rate": 2.606606606606607e-05, "loss": 0.9251, "step": 4350 }, { "epoch": 0.6397353427678736, "grad_norm": 0.819821298122406, "learning_rate": 2.6026026026026028e-05, "loss": 1.0798, "step": 4351 }, { "epoch": 0.6398823745634994, "grad_norm": 0.6656141877174377, "learning_rate": 2.598598598598599e-05, "loss": 1.2198, "step": 4352 }, { "epoch": 0.6400294063591252, "grad_norm": 0.6069365739822388, "learning_rate": 2.594594594594595e-05, "loss": 0.9345, "step": 4353 }, { "epoch": 0.640176438154751, "grad_norm": 0.5042901039123535, "learning_rate": 2.5905905905905907e-05, "loss": 1.2103, "step": 4354 }, { "epoch": 0.6403234699503768, "grad_norm": 0.6816619634628296, "learning_rate": 2.5865865865865868e-05, "loss": 1.14, "step": 4355 }, { "epoch": 0.6404705017460026, "grad_norm": 0.44087740778923035, "learning_rate": 2.582582582582583e-05, "loss": 1.2113, "step": 4356 }, { "epoch": 0.6406175335416284, "grad_norm": 0.5141002535820007, "learning_rate": 2.578578578578579e-05, "loss": 1.2971, "step": 4357 }, { "epoch": 0.6407645653372542, "grad_norm": 0.9161085486412048, "learning_rate": 2.5745745745745747e-05, "loss": 0.9942, "step": 4358 }, { "epoch": 0.64091159713288, "grad_norm": 0.5710563063621521, "learning_rate": 2.5705705705705707e-05, "loss": 1.4083, "step": 4359 }, { "epoch": 0.6410586289285057, "grad_norm": 0.6281083226203918, "learning_rate": 2.5665665665665668e-05, "loss": 1.0793, "step": 4360 }, { "epoch": 0.6412056607241315, "grad_norm": 0.6219236850738525, "learning_rate": 2.5625625625625625e-05, "loss": 1.2502, "step": 4361 }, { "epoch": 0.6413526925197574, "grad_norm": 0.6630754470825195, "learning_rate": 2.5585585585585586e-05, "loss": 1.1464, "step": 4362 }, { "epoch": 0.6414997243153832, "grad_norm": 0.6456663608551025, "learning_rate": 2.5545545545545547e-05, "loss": 1.0612, "step": 4363 }, { "epoch": 0.641646756111009, "grad_norm": 0.9105227589607239, "learning_rate": 2.5505505505505508e-05, "loss": 0.7877, "step": 4364 }, { "epoch": 0.6417937879066348, "grad_norm": 0.4924158453941345, "learning_rate": 2.5465465465465465e-05, "loss": 1.6681, "step": 4365 }, { "epoch": 0.6419408197022606, "grad_norm": 0.4235648810863495, "learning_rate": 2.5425425425425426e-05, "loss": 1.1636, "step": 4366 }, { "epoch": 0.6420878514978864, "grad_norm": 0.7328130602836609, "learning_rate": 2.5385385385385386e-05, "loss": 1.0959, "step": 4367 }, { "epoch": 0.6422348832935122, "grad_norm": 0.6752737164497375, "learning_rate": 2.5345345345345344e-05, "loss": 1.2927, "step": 4368 }, { "epoch": 0.642381915089138, "grad_norm": 0.48201388120651245, "learning_rate": 2.5305305305305304e-05, "loss": 1.0957, "step": 4369 }, { "epoch": 0.6425289468847638, "grad_norm": 0.43398767709732056, "learning_rate": 2.5265265265265265e-05, "loss": 1.3838, "step": 4370 }, { "epoch": 0.6426759786803896, "grad_norm": 0.5419015884399414, "learning_rate": 2.5225225225225222e-05, "loss": 1.5815, "step": 4371 }, { "epoch": 0.6428230104760154, "grad_norm": 0.6833510398864746, "learning_rate": 2.5185185185185183e-05, "loss": 1.1133, "step": 4372 }, { "epoch": 0.6429700422716412, "grad_norm": 0.49623775482177734, "learning_rate": 2.5145145145145144e-05, "loss": 1.3564, "step": 4373 }, { "epoch": 0.643117074067267, "grad_norm": 0.6120020151138306, "learning_rate": 2.5105105105105108e-05, "loss": 1.0234, "step": 4374 }, { "epoch": 0.6432641058628928, "grad_norm": 0.45364585518836975, "learning_rate": 2.506506506506507e-05, "loss": 1.7103, "step": 4375 }, { "epoch": 0.6434111376585187, "grad_norm": 0.6972809433937073, "learning_rate": 2.502502502502503e-05, "loss": 1.4131, "step": 4376 }, { "epoch": 0.6435581694541445, "grad_norm": 0.6347463726997375, "learning_rate": 2.4984984984984987e-05, "loss": 1.016, "step": 4377 }, { "epoch": 0.6437052012497703, "grad_norm": 0.5980704426765442, "learning_rate": 2.4944944944944944e-05, "loss": 1.1226, "step": 4378 }, { "epoch": 0.6438522330453961, "grad_norm": 0.49305203557014465, "learning_rate": 2.4904904904904905e-05, "loss": 1.5052, "step": 4379 }, { "epoch": 0.6439992648410219, "grad_norm": 0.6258924007415771, "learning_rate": 2.486486486486487e-05, "loss": 0.836, "step": 4380 }, { "epoch": 0.6441462966366477, "grad_norm": 0.6105144023895264, "learning_rate": 2.4824824824824826e-05, "loss": 1.3557, "step": 4381 }, { "epoch": 0.6442933284322735, "grad_norm": 0.6137268543243408, "learning_rate": 2.4784784784784787e-05, "loss": 1.3778, "step": 4382 }, { "epoch": 0.6444403602278993, "grad_norm": 0.609563946723938, "learning_rate": 2.4744744744744748e-05, "loss": 1.0623, "step": 4383 }, { "epoch": 0.6445873920235251, "grad_norm": 0.7170994281768799, "learning_rate": 2.4704704704704705e-05, "loss": 1.1789, "step": 4384 }, { "epoch": 0.6447344238191509, "grad_norm": 0.391255259513855, "learning_rate": 2.4664664664664666e-05, "loss": 1.2913, "step": 4385 }, { "epoch": 0.6448814556147767, "grad_norm": 0.6667273044586182, "learning_rate": 2.4624624624624627e-05, "loss": 0.9463, "step": 4386 }, { "epoch": 0.6450284874104025, "grad_norm": 0.6285285949707031, "learning_rate": 2.4584584584584584e-05, "loss": 1.2969, "step": 4387 }, { "epoch": 0.6451755192060283, "grad_norm": 0.5973974466323853, "learning_rate": 2.4544544544544545e-05, "loss": 1.0496, "step": 4388 }, { "epoch": 0.6453225510016541, "grad_norm": 0.6927722692489624, "learning_rate": 2.4504504504504506e-05, "loss": 1.0796, "step": 4389 }, { "epoch": 0.64546958279728, "grad_norm": 0.721768856048584, "learning_rate": 2.4464464464464466e-05, "loss": 0.8653, "step": 4390 }, { "epoch": 0.6456166145929058, "grad_norm": 0.6296173930168152, "learning_rate": 2.4424424424424424e-05, "loss": 1.2991, "step": 4391 }, { "epoch": 0.6457636463885316, "grad_norm": 0.7494940757751465, "learning_rate": 2.4384384384384384e-05, "loss": 1.0246, "step": 4392 }, { "epoch": 0.6459106781841574, "grad_norm": 0.542172908782959, "learning_rate": 2.4344344344344345e-05, "loss": 1.149, "step": 4393 }, { "epoch": 0.6460577099797832, "grad_norm": 0.6459487676620483, "learning_rate": 2.4304304304304306e-05, "loss": 1.1756, "step": 4394 }, { "epoch": 0.646204741775409, "grad_norm": 0.6736636161804199, "learning_rate": 2.4264264264264267e-05, "loss": 1.2915, "step": 4395 }, { "epoch": 0.6463517735710347, "grad_norm": 0.4532468020915985, "learning_rate": 2.4224224224224227e-05, "loss": 1.4169, "step": 4396 }, { "epoch": 0.6464988053666605, "grad_norm": 0.5947092175483704, "learning_rate": 2.4184184184184185e-05, "loss": 1.337, "step": 4397 }, { "epoch": 0.6466458371622863, "grad_norm": 0.6125962138175964, "learning_rate": 2.4144144144144145e-05, "loss": 1.1764, "step": 4398 }, { "epoch": 0.6467928689579121, "grad_norm": 0.5419594645500183, "learning_rate": 2.4104104104104106e-05, "loss": 1.3248, "step": 4399 }, { "epoch": 0.6469399007535379, "grad_norm": 0.6240352988243103, "learning_rate": 2.4064064064064067e-05, "loss": 1.151, "step": 4400 }, { "epoch": 0.6470869325491637, "grad_norm": 0.6130089163780212, "learning_rate": 2.4024024024024024e-05, "loss": 1.0426, "step": 4401 }, { "epoch": 0.6472339643447895, "grad_norm": 0.42740631103515625, "learning_rate": 2.3983983983983985e-05, "loss": 1.3863, "step": 4402 }, { "epoch": 0.6473809961404153, "grad_norm": 0.771852970123291, "learning_rate": 2.3943943943943946e-05, "loss": 1.274, "step": 4403 }, { "epoch": 0.6475280279360411, "grad_norm": 0.4957602620124817, "learning_rate": 2.3903903903903903e-05, "loss": 1.5126, "step": 4404 }, { "epoch": 0.647675059731667, "grad_norm": 0.5528574585914612, "learning_rate": 2.3863863863863864e-05, "loss": 1.2501, "step": 4405 }, { "epoch": 0.6478220915272928, "grad_norm": 0.7043609619140625, "learning_rate": 2.3823823823823824e-05, "loss": 0.7999, "step": 4406 }, { "epoch": 0.6479691233229186, "grad_norm": 0.6435564756393433, "learning_rate": 2.3783783783783785e-05, "loss": 1.0441, "step": 4407 }, { "epoch": 0.6481161551185444, "grad_norm": 0.5277215242385864, "learning_rate": 2.3743743743743746e-05, "loss": 1.0167, "step": 4408 }, { "epoch": 0.6482631869141702, "grad_norm": 0.6103957295417786, "learning_rate": 2.3703703703703707e-05, "loss": 1.125, "step": 4409 }, { "epoch": 0.648410218709796, "grad_norm": 0.513533353805542, "learning_rate": 2.3663663663663664e-05, "loss": 0.8358, "step": 4410 }, { "epoch": 0.6485572505054218, "grad_norm": 0.5822516679763794, "learning_rate": 2.3623623623623625e-05, "loss": 1.1431, "step": 4411 }, { "epoch": 0.6487042823010476, "grad_norm": 0.6409050226211548, "learning_rate": 2.3583583583583585e-05, "loss": 1.277, "step": 4412 }, { "epoch": 0.6488513140966734, "grad_norm": 0.5538650751113892, "learning_rate": 2.3543543543543546e-05, "loss": 0.9478, "step": 4413 }, { "epoch": 0.6489983458922992, "grad_norm": 0.5799229741096497, "learning_rate": 2.3503503503503504e-05, "loss": 1.1035, "step": 4414 }, { "epoch": 0.649145377687925, "grad_norm": 0.7192962169647217, "learning_rate": 2.3463463463463464e-05, "loss": 1.2847, "step": 4415 }, { "epoch": 0.6492924094835508, "grad_norm": 0.5867130160331726, "learning_rate": 2.3423423423423425e-05, "loss": 1.3601, "step": 4416 }, { "epoch": 0.6494394412791766, "grad_norm": 0.5480564832687378, "learning_rate": 2.3383383383383382e-05, "loss": 1.3195, "step": 4417 }, { "epoch": 0.6495864730748024, "grad_norm": 0.6134840250015259, "learning_rate": 2.3343343343343343e-05, "loss": 1.3183, "step": 4418 }, { "epoch": 0.6497335048704282, "grad_norm": 0.7657390236854553, "learning_rate": 2.3303303303303304e-05, "loss": 1.1832, "step": 4419 }, { "epoch": 0.649880536666054, "grad_norm": 0.6595509052276611, "learning_rate": 2.3263263263263265e-05, "loss": 1.011, "step": 4420 }, { "epoch": 0.6500275684616799, "grad_norm": 0.4605445861816406, "learning_rate": 2.3223223223223225e-05, "loss": 1.316, "step": 4421 }, { "epoch": 0.6501746002573057, "grad_norm": 0.5856523513793945, "learning_rate": 2.3183183183183186e-05, "loss": 1.0506, "step": 4422 }, { "epoch": 0.6503216320529315, "grad_norm": 0.4314640164375305, "learning_rate": 2.3143143143143143e-05, "loss": 1.2238, "step": 4423 }, { "epoch": 0.6504686638485573, "grad_norm": 0.4652751684188843, "learning_rate": 2.3103103103103104e-05, "loss": 1.3266, "step": 4424 }, { "epoch": 0.6506156956441831, "grad_norm": 0.6975327730178833, "learning_rate": 2.3063063063063065e-05, "loss": 0.8775, "step": 4425 }, { "epoch": 0.6507627274398089, "grad_norm": 0.6016844511032104, "learning_rate": 2.3023023023023026e-05, "loss": 1.2336, "step": 4426 }, { "epoch": 0.6509097592354347, "grad_norm": 0.5109543800354004, "learning_rate": 2.2982982982982983e-05, "loss": 0.801, "step": 4427 }, { "epoch": 0.6510567910310605, "grad_norm": 0.7315151691436768, "learning_rate": 2.2942942942942944e-05, "loss": 1.1268, "step": 4428 }, { "epoch": 0.6512038228266863, "grad_norm": 0.6775028705596924, "learning_rate": 2.2902902902902904e-05, "loss": 1.068, "step": 4429 }, { "epoch": 0.6513508546223121, "grad_norm": 0.6274616122245789, "learning_rate": 2.2862862862862862e-05, "loss": 0.858, "step": 4430 }, { "epoch": 0.6514978864179379, "grad_norm": 0.6640826463699341, "learning_rate": 2.2822822822822822e-05, "loss": 1.0678, "step": 4431 }, { "epoch": 0.6516449182135636, "grad_norm": 0.5942308306694031, "learning_rate": 2.2782782782782783e-05, "loss": 1.4439, "step": 4432 }, { "epoch": 0.6517919500091894, "grad_norm": 0.6624845266342163, "learning_rate": 2.2742742742742744e-05, "loss": 1.251, "step": 4433 }, { "epoch": 0.6519389818048152, "grad_norm": 0.5664684176445007, "learning_rate": 2.2702702702702705e-05, "loss": 1.2522, "step": 4434 }, { "epoch": 0.652086013600441, "grad_norm": 0.3419259488582611, "learning_rate": 2.2662662662662665e-05, "loss": 1.7095, "step": 4435 }, { "epoch": 0.6522330453960669, "grad_norm": 0.6276178956031799, "learning_rate": 2.2622622622622626e-05, "loss": 1.0535, "step": 4436 }, { "epoch": 0.6523800771916927, "grad_norm": 0.5566128492355347, "learning_rate": 2.2582582582582583e-05, "loss": 1.52, "step": 4437 }, { "epoch": 0.6525271089873185, "grad_norm": 0.695844292640686, "learning_rate": 2.2542542542542544e-05, "loss": 1.0908, "step": 4438 }, { "epoch": 0.6526741407829443, "grad_norm": 0.7585718631744385, "learning_rate": 2.2502502502502505e-05, "loss": 0.8376, "step": 4439 }, { "epoch": 0.6528211725785701, "grad_norm": 0.6491594910621643, "learning_rate": 2.2462462462462462e-05, "loss": 1.2942, "step": 4440 }, { "epoch": 0.6529682043741959, "grad_norm": 0.7102609872817993, "learning_rate": 2.2422422422422423e-05, "loss": 0.7848, "step": 4441 }, { "epoch": 0.6531152361698217, "grad_norm": 0.7926822304725647, "learning_rate": 2.2382382382382384e-05, "loss": 0.7514, "step": 4442 }, { "epoch": 0.6532622679654475, "grad_norm": 0.5720759034156799, "learning_rate": 2.2342342342342344e-05, "loss": 1.0669, "step": 4443 }, { "epoch": 0.6534092997610733, "grad_norm": 0.543712317943573, "learning_rate": 2.2302302302302302e-05, "loss": 1.4517, "step": 4444 }, { "epoch": 0.6535563315566991, "grad_norm": 0.7294083833694458, "learning_rate": 2.2262262262262263e-05, "loss": 0.8914, "step": 4445 }, { "epoch": 0.6537033633523249, "grad_norm": 0.6117666959762573, "learning_rate": 2.2222222222222223e-05, "loss": 1.5039, "step": 4446 }, { "epoch": 0.6538503951479507, "grad_norm": 0.5169798731803894, "learning_rate": 2.2182182182182184e-05, "loss": 1.1755, "step": 4447 }, { "epoch": 0.6539974269435765, "grad_norm": 0.5265130996704102, "learning_rate": 2.2142142142142145e-05, "loss": 1.4978, "step": 4448 }, { "epoch": 0.6541444587392024, "grad_norm": 0.656061053276062, "learning_rate": 2.2102102102102105e-05, "loss": 1.0695, "step": 4449 }, { "epoch": 0.6542914905348282, "grad_norm": 0.7320990562438965, "learning_rate": 2.2062062062062063e-05, "loss": 1.1934, "step": 4450 }, { "epoch": 0.654438522330454, "grad_norm": 0.5910030603408813, "learning_rate": 2.2022022022022024e-05, "loss": 1.7463, "step": 4451 }, { "epoch": 0.6545855541260798, "grad_norm": 0.3541339039802551, "learning_rate": 2.1981981981981984e-05, "loss": 1.4786, "step": 4452 }, { "epoch": 0.6547325859217056, "grad_norm": 0.6711605787277222, "learning_rate": 2.194194194194194e-05, "loss": 1.2317, "step": 4453 }, { "epoch": 0.6548796177173314, "grad_norm": 0.41605570912361145, "learning_rate": 2.1901901901901902e-05, "loss": 1.4778, "step": 4454 }, { "epoch": 0.6550266495129572, "grad_norm": 0.5872729420661926, "learning_rate": 2.1861861861861863e-05, "loss": 1.0262, "step": 4455 }, { "epoch": 0.655173681308583, "grad_norm": 0.6413201093673706, "learning_rate": 2.1821821821821824e-05, "loss": 0.989, "step": 4456 }, { "epoch": 0.6553207131042088, "grad_norm": 0.641121506690979, "learning_rate": 2.178178178178178e-05, "loss": 0.8062, "step": 4457 }, { "epoch": 0.6554677448998346, "grad_norm": 0.5891222357749939, "learning_rate": 2.1741741741741742e-05, "loss": 1.3637, "step": 4458 }, { "epoch": 0.6556147766954604, "grad_norm": 0.5643410086631775, "learning_rate": 2.1701701701701703e-05, "loss": 0.9049, "step": 4459 }, { "epoch": 0.6557618084910862, "grad_norm": 0.487625390291214, "learning_rate": 2.166166166166166e-05, "loss": 1.0934, "step": 4460 }, { "epoch": 0.655908840286712, "grad_norm": 0.45959675312042236, "learning_rate": 2.1621621621621624e-05, "loss": 1.6488, "step": 4461 }, { "epoch": 0.6560558720823378, "grad_norm": 0.63741534948349, "learning_rate": 2.1581581581581585e-05, "loss": 1.0695, "step": 4462 }, { "epoch": 0.6562029038779637, "grad_norm": 0.7341814637184143, "learning_rate": 2.1541541541541542e-05, "loss": 0.9007, "step": 4463 }, { "epoch": 0.6563499356735895, "grad_norm": 0.5846258997917175, "learning_rate": 2.1501501501501503e-05, "loss": 1.1334, "step": 4464 }, { "epoch": 0.6564969674692153, "grad_norm": 0.622433066368103, "learning_rate": 2.1461461461461464e-05, "loss": 0.963, "step": 4465 }, { "epoch": 0.6566439992648411, "grad_norm": 0.7013433575630188, "learning_rate": 2.142142142142142e-05, "loss": 0.985, "step": 4466 }, { "epoch": 0.6567910310604669, "grad_norm": 0.6837100386619568, "learning_rate": 2.1381381381381382e-05, "loss": 1.0033, "step": 4467 }, { "epoch": 0.6569380628560926, "grad_norm": 0.4086858034133911, "learning_rate": 2.1341341341341342e-05, "loss": 1.0037, "step": 4468 }, { "epoch": 0.6570850946517184, "grad_norm": 0.5942022204399109, "learning_rate": 2.1301301301301303e-05, "loss": 1.2246, "step": 4469 }, { "epoch": 0.6572321264473442, "grad_norm": 0.5322518944740295, "learning_rate": 2.126126126126126e-05, "loss": 1.1946, "step": 4470 }, { "epoch": 0.65737915824297, "grad_norm": 0.7005709409713745, "learning_rate": 2.122122122122122e-05, "loss": 1.1385, "step": 4471 }, { "epoch": 0.6575261900385958, "grad_norm": 0.7940104007720947, "learning_rate": 2.1181181181181182e-05, "loss": 0.9968, "step": 4472 }, { "epoch": 0.6576732218342216, "grad_norm": 0.5625956654548645, "learning_rate": 2.114114114114114e-05, "loss": 1.2828, "step": 4473 }, { "epoch": 0.6578202536298474, "grad_norm": 0.4938793182373047, "learning_rate": 2.1101101101101103e-05, "loss": 1.1907, "step": 4474 }, { "epoch": 0.6579672854254732, "grad_norm": 0.592445969581604, "learning_rate": 2.1061061061061064e-05, "loss": 1.134, "step": 4475 }, { "epoch": 0.658114317221099, "grad_norm": 0.5371370911598206, "learning_rate": 2.102102102102102e-05, "loss": 1.2342, "step": 4476 }, { "epoch": 0.6582613490167248, "grad_norm": 0.5733585357666016, "learning_rate": 2.0980980980980982e-05, "loss": 1.2153, "step": 4477 }, { "epoch": 0.6584083808123506, "grad_norm": 0.5519729256629944, "learning_rate": 2.0940940940940943e-05, "loss": 1.0878, "step": 4478 }, { "epoch": 0.6585554126079765, "grad_norm": 0.6039322018623352, "learning_rate": 2.0900900900900904e-05, "loss": 0.9755, "step": 4479 }, { "epoch": 0.6587024444036023, "grad_norm": 0.6447420716285706, "learning_rate": 2.086086086086086e-05, "loss": 1.1904, "step": 4480 }, { "epoch": 0.6588494761992281, "grad_norm": 0.639284074306488, "learning_rate": 2.0820820820820822e-05, "loss": 1.457, "step": 4481 }, { "epoch": 0.6589965079948539, "grad_norm": 0.6594791412353516, "learning_rate": 2.0780780780780783e-05, "loss": 1.2971, "step": 4482 }, { "epoch": 0.6591435397904797, "grad_norm": 0.5814533233642578, "learning_rate": 2.074074074074074e-05, "loss": 1.1135, "step": 4483 }, { "epoch": 0.6592905715861055, "grad_norm": 0.6011852622032166, "learning_rate": 2.07007007007007e-05, "loss": 0.9732, "step": 4484 }, { "epoch": 0.6594376033817313, "grad_norm": 0.46250486373901367, "learning_rate": 2.066066066066066e-05, "loss": 1.1673, "step": 4485 }, { "epoch": 0.6595846351773571, "grad_norm": 0.6067174673080444, "learning_rate": 2.062062062062062e-05, "loss": 1.4711, "step": 4486 }, { "epoch": 0.6597316669729829, "grad_norm": 0.551114559173584, "learning_rate": 2.0580580580580583e-05, "loss": 1.0982, "step": 4487 }, { "epoch": 0.6598786987686087, "grad_norm": 0.6863186359405518, "learning_rate": 2.0540540540540544e-05, "loss": 1.0246, "step": 4488 }, { "epoch": 0.6600257305642345, "grad_norm": 0.6434509754180908, "learning_rate": 2.05005005005005e-05, "loss": 1.1428, "step": 4489 }, { "epoch": 0.6601727623598603, "grad_norm": 0.6451184153556824, "learning_rate": 2.046046046046046e-05, "loss": 0.9599, "step": 4490 }, { "epoch": 0.6603197941554861, "grad_norm": 0.6823612451553345, "learning_rate": 2.0420420420420422e-05, "loss": 1.3073, "step": 4491 }, { "epoch": 0.660466825951112, "grad_norm": 0.542483925819397, "learning_rate": 2.0380380380380383e-05, "loss": 1.526, "step": 4492 }, { "epoch": 0.6606138577467378, "grad_norm": 0.7237178683280945, "learning_rate": 2.034034034034034e-05, "loss": 1.2354, "step": 4493 }, { "epoch": 0.6607608895423636, "grad_norm": 0.51275634765625, "learning_rate": 2.03003003003003e-05, "loss": 1.4841, "step": 4494 }, { "epoch": 0.6609079213379894, "grad_norm": 0.43291813135147095, "learning_rate": 2.0260260260260262e-05, "loss": 1.5181, "step": 4495 }, { "epoch": 0.6610549531336152, "grad_norm": 0.5233009457588196, "learning_rate": 2.022022022022022e-05, "loss": 1.2077, "step": 4496 }, { "epoch": 0.661201984929241, "grad_norm": 0.8723666667938232, "learning_rate": 2.018018018018018e-05, "loss": 0.898, "step": 4497 }, { "epoch": 0.6613490167248668, "grad_norm": 0.5238919258117676, "learning_rate": 2.014014014014014e-05, "loss": 1.5041, "step": 4498 }, { "epoch": 0.6614960485204926, "grad_norm": 0.5993878245353699, "learning_rate": 2.01001001001001e-05, "loss": 0.8694, "step": 4499 }, { "epoch": 0.6616430803161184, "grad_norm": 0.7392860054969788, "learning_rate": 2.006006006006006e-05, "loss": 1.1094, "step": 4500 }, { "epoch": 0.6617901121117442, "grad_norm": 0.6313422322273254, "learning_rate": 2.0020020020020023e-05, "loss": 1.1757, "step": 4501 }, { "epoch": 0.66193714390737, "grad_norm": 0.6592509150505066, "learning_rate": 1.997997997997998e-05, "loss": 1.2206, "step": 4502 }, { "epoch": 0.6620841757029958, "grad_norm": 0.5248935222625732, "learning_rate": 1.993993993993994e-05, "loss": 1.6051, "step": 4503 }, { "epoch": 0.6622312074986216, "grad_norm": 0.3915221095085144, "learning_rate": 1.9899899899899902e-05, "loss": 1.1229, "step": 4504 }, { "epoch": 0.6623782392942473, "grad_norm": 0.6734794974327087, "learning_rate": 1.9859859859859863e-05, "loss": 0.8685, "step": 4505 }, { "epoch": 0.6625252710898731, "grad_norm": 0.43827033042907715, "learning_rate": 1.981981981981982e-05, "loss": 1.7297, "step": 4506 }, { "epoch": 0.6626723028854989, "grad_norm": 0.5908958315849304, "learning_rate": 1.977977977977978e-05, "loss": 1.0835, "step": 4507 }, { "epoch": 0.6628193346811247, "grad_norm": 0.4456884264945984, "learning_rate": 1.973973973973974e-05, "loss": 1.4228, "step": 4508 }, { "epoch": 0.6629663664767506, "grad_norm": 0.7308863997459412, "learning_rate": 1.96996996996997e-05, "loss": 1.0479, "step": 4509 }, { "epoch": 0.6631133982723764, "grad_norm": 0.6541045904159546, "learning_rate": 1.965965965965966e-05, "loss": 1.2005, "step": 4510 }, { "epoch": 0.6632604300680022, "grad_norm": 0.6136733889579773, "learning_rate": 1.961961961961962e-05, "loss": 1.1726, "step": 4511 }, { "epoch": 0.663407461863628, "grad_norm": 0.4907965064048767, "learning_rate": 1.957957957957958e-05, "loss": 1.0581, "step": 4512 }, { "epoch": 0.6635544936592538, "grad_norm": 0.6172305941581726, "learning_rate": 1.9539539539539538e-05, "loss": 1.0489, "step": 4513 }, { "epoch": 0.6637015254548796, "grad_norm": 0.46465784311294556, "learning_rate": 1.9499499499499502e-05, "loss": 1.5648, "step": 4514 }, { "epoch": 0.6638485572505054, "grad_norm": 0.5769606232643127, "learning_rate": 1.9459459459459463e-05, "loss": 1.3079, "step": 4515 }, { "epoch": 0.6639955890461312, "grad_norm": 0.5297374725341797, "learning_rate": 1.941941941941942e-05, "loss": 1.2323, "step": 4516 }, { "epoch": 0.664142620841757, "grad_norm": 0.45984816551208496, "learning_rate": 1.937937937937938e-05, "loss": 1.6281, "step": 4517 }, { "epoch": 0.6642896526373828, "grad_norm": 0.7893324494361877, "learning_rate": 1.9339339339339342e-05, "loss": 1.3457, "step": 4518 }, { "epoch": 0.6644366844330086, "grad_norm": 0.5361487865447998, "learning_rate": 1.92992992992993e-05, "loss": 1.6453, "step": 4519 }, { "epoch": 0.6645837162286344, "grad_norm": 0.5893986821174622, "learning_rate": 1.925925925925926e-05, "loss": 1.0567, "step": 4520 }, { "epoch": 0.6647307480242602, "grad_norm": 0.5617702603340149, "learning_rate": 1.921921921921922e-05, "loss": 1.1785, "step": 4521 }, { "epoch": 0.664877779819886, "grad_norm": 0.5060228109359741, "learning_rate": 1.9179179179179178e-05, "loss": 0.9389, "step": 4522 }, { "epoch": 0.6650248116155119, "grad_norm": 0.5043153166770935, "learning_rate": 1.913913913913914e-05, "loss": 1.5193, "step": 4523 }, { "epoch": 0.6651718434111377, "grad_norm": 0.6935690641403198, "learning_rate": 1.90990990990991e-05, "loss": 1.2567, "step": 4524 }, { "epoch": 0.6653188752067635, "grad_norm": 0.6475191712379456, "learning_rate": 1.905905905905906e-05, "loss": 1.4528, "step": 4525 }, { "epoch": 0.6654659070023893, "grad_norm": 0.8747141361236572, "learning_rate": 1.9019019019019018e-05, "loss": 1.2543, "step": 4526 }, { "epoch": 0.6656129387980151, "grad_norm": 0.4888969957828522, "learning_rate": 1.8978978978978982e-05, "loss": 1.5297, "step": 4527 }, { "epoch": 0.6657599705936409, "grad_norm": 0.5079041123390198, "learning_rate": 1.8938938938938942e-05, "loss": 1.1219, "step": 4528 }, { "epoch": 0.6659070023892667, "grad_norm": 0.6312013864517212, "learning_rate": 1.88988988988989e-05, "loss": 1.0354, "step": 4529 }, { "epoch": 0.6660540341848925, "grad_norm": 0.5871734023094177, "learning_rate": 1.885885885885886e-05, "loss": 1.1636, "step": 4530 }, { "epoch": 0.6662010659805183, "grad_norm": 0.7759671211242676, "learning_rate": 1.881881881881882e-05, "loss": 1.295, "step": 4531 }, { "epoch": 0.6663480977761441, "grad_norm": 0.6187940239906311, "learning_rate": 1.877877877877878e-05, "loss": 1.0954, "step": 4532 }, { "epoch": 0.6664951295717699, "grad_norm": 0.7364678382873535, "learning_rate": 1.873873873873874e-05, "loss": 0.9573, "step": 4533 }, { "epoch": 0.6666421613673957, "grad_norm": 0.7576497197151184, "learning_rate": 1.86986986986987e-05, "loss": 1.0514, "step": 4534 }, { "epoch": 0.6667891931630215, "grad_norm": 0.6005411148071289, "learning_rate": 1.865865865865866e-05, "loss": 1.2995, "step": 4535 }, { "epoch": 0.6669362249586474, "grad_norm": 0.681819498538971, "learning_rate": 1.8618618618618618e-05, "loss": 1.1013, "step": 4536 }, { "epoch": 0.6670832567542732, "grad_norm": 0.5928929448127747, "learning_rate": 1.857857857857858e-05, "loss": 1.1182, "step": 4537 }, { "epoch": 0.667230288549899, "grad_norm": 0.5428171157836914, "learning_rate": 1.853853853853854e-05, "loss": 1.1156, "step": 4538 }, { "epoch": 0.6673773203455248, "grad_norm": 0.62901771068573, "learning_rate": 1.8498498498498497e-05, "loss": 0.8391, "step": 4539 }, { "epoch": 0.6675243521411506, "grad_norm": 0.7728533148765564, "learning_rate": 1.8458458458458458e-05, "loss": 0.9363, "step": 4540 }, { "epoch": 0.6676713839367763, "grad_norm": 0.5123627185821533, "learning_rate": 1.8418418418418422e-05, "loss": 0.8675, "step": 4541 }, { "epoch": 0.6678184157324021, "grad_norm": 0.5705954432487488, "learning_rate": 1.837837837837838e-05, "loss": 1.2175, "step": 4542 }, { "epoch": 0.6679654475280279, "grad_norm": 0.6770231127738953, "learning_rate": 1.833833833833834e-05, "loss": 1.2507, "step": 4543 }, { "epoch": 0.6681124793236537, "grad_norm": 0.6653541922569275, "learning_rate": 1.82982982982983e-05, "loss": 1.1405, "step": 4544 }, { "epoch": 0.6682595111192795, "grad_norm": 0.4881608188152313, "learning_rate": 1.8258258258258258e-05, "loss": 1.0707, "step": 4545 }, { "epoch": 0.6684065429149053, "grad_norm": 0.48359254002571106, "learning_rate": 1.821821821821822e-05, "loss": 1.6177, "step": 4546 }, { "epoch": 0.6685535747105311, "grad_norm": 0.5093848705291748, "learning_rate": 1.817817817817818e-05, "loss": 1.1189, "step": 4547 }, { "epoch": 0.6687006065061569, "grad_norm": 0.3947668969631195, "learning_rate": 1.813813813813814e-05, "loss": 0.605, "step": 4548 }, { "epoch": 0.6688476383017827, "grad_norm": 0.6206204295158386, "learning_rate": 1.8098098098098098e-05, "loss": 1.0089, "step": 4549 }, { "epoch": 0.6689946700974085, "grad_norm": 0.595554530620575, "learning_rate": 1.8058058058058058e-05, "loss": 0.8742, "step": 4550 }, { "epoch": 0.6691417018930343, "grad_norm": 0.544804036617279, "learning_rate": 1.801801801801802e-05, "loss": 0.8426, "step": 4551 }, { "epoch": 0.6692887336886602, "grad_norm": 0.4829943776130676, "learning_rate": 1.7977977977977976e-05, "loss": 1.4593, "step": 4552 }, { "epoch": 0.669435765484286, "grad_norm": 0.7006345391273499, "learning_rate": 1.7937937937937937e-05, "loss": 0.9938, "step": 4553 }, { "epoch": 0.6695827972799118, "grad_norm": 0.6115264892578125, "learning_rate": 1.78978978978979e-05, "loss": 1.0428, "step": 4554 }, { "epoch": 0.6697298290755376, "grad_norm": 0.7454063296318054, "learning_rate": 1.785785785785786e-05, "loss": 1.0046, "step": 4555 }, { "epoch": 0.6698768608711634, "grad_norm": 0.47465962171554565, "learning_rate": 1.781781781781782e-05, "loss": 1.3009, "step": 4556 }, { "epoch": 0.6700238926667892, "grad_norm": 0.6555954813957214, "learning_rate": 1.777777777777778e-05, "loss": 0.8774, "step": 4557 }, { "epoch": 0.670170924462415, "grad_norm": 0.8689224720001221, "learning_rate": 1.773773773773774e-05, "loss": 0.9489, "step": 4558 }, { "epoch": 0.6703179562580408, "grad_norm": 0.4688512086868286, "learning_rate": 1.7697697697697698e-05, "loss": 1.2418, "step": 4559 }, { "epoch": 0.6704649880536666, "grad_norm": 0.6865743398666382, "learning_rate": 1.765765765765766e-05, "loss": 1.1047, "step": 4560 }, { "epoch": 0.6706120198492924, "grad_norm": 1.006150722503662, "learning_rate": 1.761761761761762e-05, "loss": 0.923, "step": 4561 }, { "epoch": 0.6707590516449182, "grad_norm": 0.6297377943992615, "learning_rate": 1.7577577577577577e-05, "loss": 1.1486, "step": 4562 }, { "epoch": 0.670906083440544, "grad_norm": 0.5961238741874695, "learning_rate": 1.7537537537537538e-05, "loss": 1.5267, "step": 4563 }, { "epoch": 0.6710531152361698, "grad_norm": 1.0608872175216675, "learning_rate": 1.74974974974975e-05, "loss": 0.9891, "step": 4564 }, { "epoch": 0.6712001470317956, "grad_norm": 0.5564234256744385, "learning_rate": 1.7457457457457456e-05, "loss": 0.9394, "step": 4565 }, { "epoch": 0.6713471788274215, "grad_norm": 0.6795702576637268, "learning_rate": 1.7417417417417416e-05, "loss": 1.0254, "step": 4566 }, { "epoch": 0.6714942106230473, "grad_norm": 0.6417767405509949, "learning_rate": 1.737737737737738e-05, "loss": 0.9521, "step": 4567 }, { "epoch": 0.6716412424186731, "grad_norm": 0.8042071461677551, "learning_rate": 1.7337337337337338e-05, "loss": 1.1131, "step": 4568 }, { "epoch": 0.6717882742142989, "grad_norm": 0.5623124241828918, "learning_rate": 1.72972972972973e-05, "loss": 1.5065, "step": 4569 }, { "epoch": 0.6719353060099247, "grad_norm": 0.5074882507324219, "learning_rate": 1.725725725725726e-05, "loss": 1.4233, "step": 4570 }, { "epoch": 0.6720823378055505, "grad_norm": 0.5218638777732849, "learning_rate": 1.721721721721722e-05, "loss": 0.9764, "step": 4571 }, { "epoch": 0.6722293696011763, "grad_norm": 0.6107816100120544, "learning_rate": 1.7177177177177177e-05, "loss": 0.8903, "step": 4572 }, { "epoch": 0.6723764013968021, "grad_norm": 0.5357725024223328, "learning_rate": 1.7137137137137138e-05, "loss": 0.9962, "step": 4573 }, { "epoch": 0.6725234331924279, "grad_norm": 0.6196500658988953, "learning_rate": 1.70970970970971e-05, "loss": 0.8876, "step": 4574 }, { "epoch": 0.6726704649880537, "grad_norm": 0.7252976298332214, "learning_rate": 1.7057057057057056e-05, "loss": 1.0834, "step": 4575 }, { "epoch": 0.6728174967836795, "grad_norm": 0.48104649782180786, "learning_rate": 1.7017017017017017e-05, "loss": 0.9657, "step": 4576 }, { "epoch": 0.6729645285793052, "grad_norm": 0.5875390768051147, "learning_rate": 1.6976976976976978e-05, "loss": 1.1828, "step": 4577 }, { "epoch": 0.673111560374931, "grad_norm": 0.5633286833763123, "learning_rate": 1.693693693693694e-05, "loss": 1.6678, "step": 4578 }, { "epoch": 0.6732585921705568, "grad_norm": 0.5910934209823608, "learning_rate": 1.6896896896896896e-05, "loss": 1.0616, "step": 4579 }, { "epoch": 0.6734056239661826, "grad_norm": 0.5001141428947449, "learning_rate": 1.6856856856856857e-05, "loss": 0.9211, "step": 4580 }, { "epoch": 0.6735526557618084, "grad_norm": 0.7066851258277893, "learning_rate": 1.6816816816816817e-05, "loss": 0.998, "step": 4581 }, { "epoch": 0.6736996875574343, "grad_norm": 0.5976254940032959, "learning_rate": 1.6776776776776778e-05, "loss": 1.5542, "step": 4582 }, { "epoch": 0.6738467193530601, "grad_norm": 0.6317145824432373, "learning_rate": 1.673673673673674e-05, "loss": 1.0173, "step": 4583 }, { "epoch": 0.6739937511486859, "grad_norm": 0.4639015793800354, "learning_rate": 1.66966966966967e-05, "loss": 1.0242, "step": 4584 }, { "epoch": 0.6741407829443117, "grad_norm": 0.7508028149604797, "learning_rate": 1.6656656656656657e-05, "loss": 1.4382, "step": 4585 }, { "epoch": 0.6742878147399375, "grad_norm": 0.5115826725959778, "learning_rate": 1.6616616616616618e-05, "loss": 1.1957, "step": 4586 }, { "epoch": 0.6744348465355633, "grad_norm": 0.6859282851219177, "learning_rate": 1.6576576576576578e-05, "loss": 1.0735, "step": 4587 }, { "epoch": 0.6745818783311891, "grad_norm": 0.6790472269058228, "learning_rate": 1.6536536536536536e-05, "loss": 1.3406, "step": 4588 }, { "epoch": 0.6747289101268149, "grad_norm": 0.5483352541923523, "learning_rate": 1.6496496496496496e-05, "loss": 1.0431, "step": 4589 }, { "epoch": 0.6748759419224407, "grad_norm": 0.5490705966949463, "learning_rate": 1.6456456456456457e-05, "loss": 1.1407, "step": 4590 }, { "epoch": 0.6750229737180665, "grad_norm": 0.7885764837265015, "learning_rate": 1.6416416416416418e-05, "loss": 1.2279, "step": 4591 }, { "epoch": 0.6751700055136923, "grad_norm": 0.6350445747375488, "learning_rate": 1.6376376376376375e-05, "loss": 1.2743, "step": 4592 }, { "epoch": 0.6753170373093181, "grad_norm": 0.5027633905410767, "learning_rate": 1.6336336336336336e-05, "loss": 1.1746, "step": 4593 }, { "epoch": 0.6754640691049439, "grad_norm": 0.5019071698188782, "learning_rate": 1.62962962962963e-05, "loss": 1.1821, "step": 4594 }, { "epoch": 0.6756111009005697, "grad_norm": 0.6577047109603882, "learning_rate": 1.6256256256256257e-05, "loss": 1.1066, "step": 4595 }, { "epoch": 0.6757581326961956, "grad_norm": 0.503029465675354, "learning_rate": 1.6216216216216218e-05, "loss": 1.2389, "step": 4596 }, { "epoch": 0.6759051644918214, "grad_norm": 0.8096226453781128, "learning_rate": 1.617617617617618e-05, "loss": 1.1242, "step": 4597 }, { "epoch": 0.6760521962874472, "grad_norm": 0.7372150421142578, "learning_rate": 1.6136136136136136e-05, "loss": 1.1576, "step": 4598 }, { "epoch": 0.676199228083073, "grad_norm": 0.7130180597305298, "learning_rate": 1.6096096096096097e-05, "loss": 1.168, "step": 4599 }, { "epoch": 0.6763462598786988, "grad_norm": 0.6887605786323547, "learning_rate": 1.6056056056056058e-05, "loss": 1.0434, "step": 4600 }, { "epoch": 0.6764932916743246, "grad_norm": 0.6255971789360046, "learning_rate": 1.6016016016016015e-05, "loss": 1.1384, "step": 4601 }, { "epoch": 0.6766403234699504, "grad_norm": 0.7630344033241272, "learning_rate": 1.5975975975975976e-05, "loss": 0.9924, "step": 4602 }, { "epoch": 0.6767873552655762, "grad_norm": 0.6329073905944824, "learning_rate": 1.5935935935935936e-05, "loss": 0.9745, "step": 4603 }, { "epoch": 0.676934387061202, "grad_norm": 0.7796976566314697, "learning_rate": 1.5895895895895897e-05, "loss": 0.7958, "step": 4604 }, { "epoch": 0.6770814188568278, "grad_norm": 0.44046416878700256, "learning_rate": 1.5855855855855855e-05, "loss": 1.0877, "step": 4605 }, { "epoch": 0.6772284506524536, "grad_norm": 0.6436654329299927, "learning_rate": 1.5815815815815815e-05, "loss": 1.2691, "step": 4606 }, { "epoch": 0.6773754824480794, "grad_norm": 0.5525913238525391, "learning_rate": 1.5775775775775776e-05, "loss": 1.2279, "step": 4607 }, { "epoch": 0.6775225142437052, "grad_norm": 0.6762361526489258, "learning_rate": 1.5735735735735737e-05, "loss": 0.8576, "step": 4608 }, { "epoch": 0.677669546039331, "grad_norm": 0.48389822244644165, "learning_rate": 1.5695695695695697e-05, "loss": 1.3591, "step": 4609 }, { "epoch": 0.6778165778349569, "grad_norm": 0.7271302938461304, "learning_rate": 1.5655655655655658e-05, "loss": 1.2229, "step": 4610 }, { "epoch": 0.6779636096305827, "grad_norm": 0.5005607008934021, "learning_rate": 1.5615615615615616e-05, "loss": 1.2372, "step": 4611 }, { "epoch": 0.6781106414262085, "grad_norm": 0.7729110717773438, "learning_rate": 1.5575575575575576e-05, "loss": 0.8218, "step": 4612 }, { "epoch": 0.6782576732218342, "grad_norm": 0.7826699614524841, "learning_rate": 1.5535535535535537e-05, "loss": 0.8248, "step": 4613 }, { "epoch": 0.67840470501746, "grad_norm": 0.7559046149253845, "learning_rate": 1.5495495495495498e-05, "loss": 0.8797, "step": 4614 }, { "epoch": 0.6785517368130858, "grad_norm": 0.6010035872459412, "learning_rate": 1.5455455455455455e-05, "loss": 1.0549, "step": 4615 }, { "epoch": 0.6786987686087116, "grad_norm": 0.6159489750862122, "learning_rate": 1.5415415415415416e-05, "loss": 1.0263, "step": 4616 }, { "epoch": 0.6788458004043374, "grad_norm": 0.7353917956352234, "learning_rate": 1.5375375375375377e-05, "loss": 0.9644, "step": 4617 }, { "epoch": 0.6789928321999632, "grad_norm": 0.5905298590660095, "learning_rate": 1.5335335335335334e-05, "loss": 1.318, "step": 4618 }, { "epoch": 0.679139863995589, "grad_norm": 0.580693781375885, "learning_rate": 1.5295295295295295e-05, "loss": 0.9651, "step": 4619 }, { "epoch": 0.6792868957912148, "grad_norm": 0.6178974509239197, "learning_rate": 1.5255255255255255e-05, "loss": 1.1602, "step": 4620 }, { "epoch": 0.6794339275868406, "grad_norm": 0.7614989280700684, "learning_rate": 1.5215215215215218e-05, "loss": 1.3651, "step": 4621 }, { "epoch": 0.6795809593824664, "grad_norm": 0.7622498273849487, "learning_rate": 1.5175175175175177e-05, "loss": 1.2913, "step": 4622 }, { "epoch": 0.6797279911780922, "grad_norm": 0.49617427587509155, "learning_rate": 1.5135135135135136e-05, "loss": 0.8051, "step": 4623 }, { "epoch": 0.679875022973718, "grad_norm": 0.6709746718406677, "learning_rate": 1.5095095095095097e-05, "loss": 1.2824, "step": 4624 }, { "epoch": 0.6800220547693439, "grad_norm": 0.6504947543144226, "learning_rate": 1.5055055055055056e-05, "loss": 0.9137, "step": 4625 }, { "epoch": 0.6801690865649697, "grad_norm": 0.49217408895492554, "learning_rate": 1.5015015015015016e-05, "loss": 1.477, "step": 4626 }, { "epoch": 0.6803161183605955, "grad_norm": 0.7266572713851929, "learning_rate": 1.4974974974974975e-05, "loss": 0.8057, "step": 4627 }, { "epoch": 0.6804631501562213, "grad_norm": 0.6570336818695068, "learning_rate": 1.4934934934934936e-05, "loss": 1.1451, "step": 4628 }, { "epoch": 0.6806101819518471, "grad_norm": 0.49877986311912537, "learning_rate": 1.4894894894894895e-05, "loss": 1.0395, "step": 4629 }, { "epoch": 0.6807572137474729, "grad_norm": 0.42845508456230164, "learning_rate": 1.4854854854854854e-05, "loss": 1.5233, "step": 4630 }, { "epoch": 0.6809042455430987, "grad_norm": 0.6454563140869141, "learning_rate": 1.4814814814814815e-05, "loss": 0.7373, "step": 4631 }, { "epoch": 0.6810512773387245, "grad_norm": 0.6364152431488037, "learning_rate": 1.4774774774774774e-05, "loss": 1.1507, "step": 4632 }, { "epoch": 0.6811983091343503, "grad_norm": 0.7260617613792419, "learning_rate": 1.4734734734734735e-05, "loss": 1.0271, "step": 4633 }, { "epoch": 0.6813453409299761, "grad_norm": 0.5984786152839661, "learning_rate": 1.4694694694694697e-05, "loss": 1.3472, "step": 4634 }, { "epoch": 0.6814923727256019, "grad_norm": 0.6113819479942322, "learning_rate": 1.4654654654654656e-05, "loss": 1.0411, "step": 4635 }, { "epoch": 0.6816394045212277, "grad_norm": 0.42095357179641724, "learning_rate": 1.4614614614614617e-05, "loss": 1.2506, "step": 4636 }, { "epoch": 0.6817864363168535, "grad_norm": 0.668673574924469, "learning_rate": 1.4574574574574576e-05, "loss": 1.0586, "step": 4637 }, { "epoch": 0.6819334681124793, "grad_norm": 0.6812184453010559, "learning_rate": 1.4534534534534535e-05, "loss": 0.9486, "step": 4638 }, { "epoch": 0.6820804999081052, "grad_norm": 0.6540973782539368, "learning_rate": 1.4494494494494496e-05, "loss": 1.2423, "step": 4639 }, { "epoch": 0.682227531703731, "grad_norm": 0.6201650500297546, "learning_rate": 1.4454454454454455e-05, "loss": 1.0727, "step": 4640 }, { "epoch": 0.6823745634993568, "grad_norm": 0.7139811515808105, "learning_rate": 1.4414414414414416e-05, "loss": 0.9594, "step": 4641 }, { "epoch": 0.6825215952949826, "grad_norm": 0.6398758292198181, "learning_rate": 1.4374374374374375e-05, "loss": 1.0311, "step": 4642 }, { "epoch": 0.6826686270906084, "grad_norm": 0.509259819984436, "learning_rate": 1.4334334334334335e-05, "loss": 1.1293, "step": 4643 }, { "epoch": 0.6828156588862342, "grad_norm": 0.6696454286575317, "learning_rate": 1.4294294294294294e-05, "loss": 1.2628, "step": 4644 }, { "epoch": 0.68296269068186, "grad_norm": 0.731802225112915, "learning_rate": 1.4254254254254253e-05, "loss": 0.8174, "step": 4645 }, { "epoch": 0.6831097224774858, "grad_norm": 0.6515408754348755, "learning_rate": 1.4214214214214214e-05, "loss": 1.0122, "step": 4646 }, { "epoch": 0.6832567542731116, "grad_norm": 0.6458297967910767, "learning_rate": 1.4174174174174173e-05, "loss": 1.543, "step": 4647 }, { "epoch": 0.6834037860687374, "grad_norm": 0.5500234961509705, "learning_rate": 1.4134134134134136e-05, "loss": 1.0063, "step": 4648 }, { "epoch": 0.6835508178643631, "grad_norm": 0.60899817943573, "learning_rate": 1.4094094094094096e-05, "loss": 1.1746, "step": 4649 }, { "epoch": 0.6836978496599889, "grad_norm": 0.5006976127624512, "learning_rate": 1.4054054054054055e-05, "loss": 1.5282, "step": 4650 }, { "epoch": 0.6838448814556147, "grad_norm": 0.6989748477935791, "learning_rate": 1.4014014014014016e-05, "loss": 1.4164, "step": 4651 }, { "epoch": 0.6839919132512405, "grad_norm": 0.7352039217948914, "learning_rate": 1.3973973973973975e-05, "loss": 1.1956, "step": 4652 }, { "epoch": 0.6841389450468663, "grad_norm": 0.7114779353141785, "learning_rate": 1.3933933933933934e-05, "loss": 0.9902, "step": 4653 }, { "epoch": 0.6842859768424921, "grad_norm": 0.7923265099525452, "learning_rate": 1.3893893893893895e-05, "loss": 1.0304, "step": 4654 }, { "epoch": 0.684433008638118, "grad_norm": 0.7012094855308533, "learning_rate": 1.3853853853853854e-05, "loss": 1.1552, "step": 4655 }, { "epoch": 0.6845800404337438, "grad_norm": 0.8495318293571472, "learning_rate": 1.3813813813813815e-05, "loss": 1.1656, "step": 4656 }, { "epoch": 0.6847270722293696, "grad_norm": 0.5163684487342834, "learning_rate": 1.3773773773773774e-05, "loss": 1.3262, "step": 4657 }, { "epoch": 0.6848741040249954, "grad_norm": 0.5047107338905334, "learning_rate": 1.3733733733733733e-05, "loss": 1.6845, "step": 4658 }, { "epoch": 0.6850211358206212, "grad_norm": 0.6433568000793457, "learning_rate": 1.3693693693693693e-05, "loss": 1.0951, "step": 4659 }, { "epoch": 0.685168167616247, "grad_norm": 0.5547446608543396, "learning_rate": 1.3653653653653653e-05, "loss": 1.3771, "step": 4660 }, { "epoch": 0.6853151994118728, "grad_norm": 0.5498102307319641, "learning_rate": 1.3613613613613615e-05, "loss": 0.8568, "step": 4661 }, { "epoch": 0.6854622312074986, "grad_norm": 0.6062688827514648, "learning_rate": 1.3573573573573576e-05, "loss": 1.2075, "step": 4662 }, { "epoch": 0.6856092630031244, "grad_norm": 0.7233086824417114, "learning_rate": 1.3533533533533535e-05, "loss": 1.0761, "step": 4663 }, { "epoch": 0.6857562947987502, "grad_norm": 0.5154514312744141, "learning_rate": 1.3493493493493495e-05, "loss": 1.0952, "step": 4664 }, { "epoch": 0.685903326594376, "grad_norm": 0.6517013311386108, "learning_rate": 1.3453453453453454e-05, "loss": 1.2035, "step": 4665 }, { "epoch": 0.6860503583900018, "grad_norm": 0.6013733744621277, "learning_rate": 1.3413413413413414e-05, "loss": 0.9705, "step": 4666 }, { "epoch": 0.6861973901856276, "grad_norm": 0.6972953081130981, "learning_rate": 1.3373373373373374e-05, "loss": 1.0287, "step": 4667 }, { "epoch": 0.6863444219812534, "grad_norm": 0.6242468953132629, "learning_rate": 1.3333333333333333e-05, "loss": 1.1739, "step": 4668 }, { "epoch": 0.6864914537768793, "grad_norm": 0.5201536417007446, "learning_rate": 1.3293293293293294e-05, "loss": 0.9337, "step": 4669 }, { "epoch": 0.6866384855725051, "grad_norm": 0.5854781270027161, "learning_rate": 1.3253253253253253e-05, "loss": 1.4373, "step": 4670 }, { "epoch": 0.6867855173681309, "grad_norm": 0.6185276508331299, "learning_rate": 1.3213213213213214e-05, "loss": 1.5362, "step": 4671 }, { "epoch": 0.6869325491637567, "grad_norm": 0.49644720554351807, "learning_rate": 1.3173173173173173e-05, "loss": 0.8254, "step": 4672 }, { "epoch": 0.6870795809593825, "grad_norm": 0.5689074993133545, "learning_rate": 1.3133133133133132e-05, "loss": 1.4937, "step": 4673 }, { "epoch": 0.6872266127550083, "grad_norm": 0.6923723816871643, "learning_rate": 1.3093093093093094e-05, "loss": 1.2641, "step": 4674 }, { "epoch": 0.6873736445506341, "grad_norm": 0.7463481426239014, "learning_rate": 1.3053053053053055e-05, "loss": 0.981, "step": 4675 }, { "epoch": 0.6875206763462599, "grad_norm": 0.5558549761772156, "learning_rate": 1.3013013013013014e-05, "loss": 1.216, "step": 4676 }, { "epoch": 0.6876677081418857, "grad_norm": 0.5385077595710754, "learning_rate": 1.2972972972972975e-05, "loss": 1.028, "step": 4677 }, { "epoch": 0.6878147399375115, "grad_norm": 0.607528805732727, "learning_rate": 1.2932932932932934e-05, "loss": 0.8695, "step": 4678 }, { "epoch": 0.6879617717331373, "grad_norm": 0.5237465500831604, "learning_rate": 1.2892892892892895e-05, "loss": 1.1727, "step": 4679 }, { "epoch": 0.6881088035287631, "grad_norm": 0.656228244304657, "learning_rate": 1.2852852852852854e-05, "loss": 0.9154, "step": 4680 }, { "epoch": 0.6882558353243889, "grad_norm": 0.5915724635124207, "learning_rate": 1.2812812812812813e-05, "loss": 1.0874, "step": 4681 }, { "epoch": 0.6884028671200147, "grad_norm": 0.6179999113082886, "learning_rate": 1.2772772772772773e-05, "loss": 0.8864, "step": 4682 }, { "epoch": 0.6885498989156406, "grad_norm": 0.5369471311569214, "learning_rate": 1.2732732732732732e-05, "loss": 1.1336, "step": 4683 }, { "epoch": 0.6886969307112664, "grad_norm": 0.4888911843299866, "learning_rate": 1.2692692692692693e-05, "loss": 1.3783, "step": 4684 }, { "epoch": 0.6888439625068921, "grad_norm": 0.7818719148635864, "learning_rate": 1.2652652652652652e-05, "loss": 1.1307, "step": 4685 }, { "epoch": 0.6889909943025179, "grad_norm": 0.6760531067848206, "learning_rate": 1.2612612612612611e-05, "loss": 0.8604, "step": 4686 }, { "epoch": 0.6891380260981437, "grad_norm": 0.5149998068809509, "learning_rate": 1.2572572572572572e-05, "loss": 1.2504, "step": 4687 }, { "epoch": 0.6892850578937695, "grad_norm": 0.9026519656181335, "learning_rate": 1.2532532532532534e-05, "loss": 1.0313, "step": 4688 }, { "epoch": 0.6894320896893953, "grad_norm": 0.6386526226997375, "learning_rate": 1.2492492492492493e-05, "loss": 1.11, "step": 4689 }, { "epoch": 0.6895791214850211, "grad_norm": 0.7867724895477295, "learning_rate": 1.2452452452452452e-05, "loss": 0.9697, "step": 4690 }, { "epoch": 0.6897261532806469, "grad_norm": 0.5547208189964294, "learning_rate": 1.2412412412412413e-05, "loss": 1.5044, "step": 4691 }, { "epoch": 0.6898731850762727, "grad_norm": 0.4509444534778595, "learning_rate": 1.2372372372372374e-05, "loss": 0.9668, "step": 4692 }, { "epoch": 0.6900202168718985, "grad_norm": 0.4549163579940796, "learning_rate": 1.2332332332332333e-05, "loss": 1.2202, "step": 4693 }, { "epoch": 0.6901672486675243, "grad_norm": 0.6304164528846741, "learning_rate": 1.2292292292292292e-05, "loss": 0.9792, "step": 4694 }, { "epoch": 0.6903142804631501, "grad_norm": 0.6592563986778259, "learning_rate": 1.2252252252252253e-05, "loss": 1.0427, "step": 4695 }, { "epoch": 0.6904613122587759, "grad_norm": 0.6708600521087646, "learning_rate": 1.2212212212212212e-05, "loss": 0.8643, "step": 4696 }, { "epoch": 0.6906083440544017, "grad_norm": 0.6284025311470032, "learning_rate": 1.2172172172172173e-05, "loss": 0.9988, "step": 4697 }, { "epoch": 0.6907553758500276, "grad_norm": 0.7270935773849487, "learning_rate": 1.2132132132132133e-05, "loss": 1.1323, "step": 4698 }, { "epoch": 0.6909024076456534, "grad_norm": 0.7445253729820251, "learning_rate": 1.2092092092092092e-05, "loss": 0.9536, "step": 4699 }, { "epoch": 0.6910494394412792, "grad_norm": 0.6176211833953857, "learning_rate": 1.2052052052052053e-05, "loss": 1.2321, "step": 4700 }, { "epoch": 0.691196471236905, "grad_norm": 0.545564591884613, "learning_rate": 1.2012012012012012e-05, "loss": 1.0796, "step": 4701 }, { "epoch": 0.6913435030325308, "grad_norm": 0.4900275468826294, "learning_rate": 1.1971971971971973e-05, "loss": 1.35, "step": 4702 }, { "epoch": 0.6914905348281566, "grad_norm": 0.6860318183898926, "learning_rate": 1.1931931931931932e-05, "loss": 1.299, "step": 4703 }, { "epoch": 0.6916375666237824, "grad_norm": 0.7189088463783264, "learning_rate": 1.1891891891891893e-05, "loss": 1.1502, "step": 4704 }, { "epoch": 0.6917845984194082, "grad_norm": 0.6243991851806641, "learning_rate": 1.1851851851851853e-05, "loss": 0.9751, "step": 4705 }, { "epoch": 0.691931630215034, "grad_norm": 0.6022056341171265, "learning_rate": 1.1811811811811812e-05, "loss": 1.2296, "step": 4706 }, { "epoch": 0.6920786620106598, "grad_norm": 0.6260016560554504, "learning_rate": 1.1771771771771773e-05, "loss": 1.3232, "step": 4707 }, { "epoch": 0.6922256938062856, "grad_norm": 0.43001341819763184, "learning_rate": 1.1731731731731732e-05, "loss": 1.129, "step": 4708 }, { "epoch": 0.6923727256019114, "grad_norm": 0.6090236306190491, "learning_rate": 1.1691691691691691e-05, "loss": 1.1153, "step": 4709 }, { "epoch": 0.6925197573975372, "grad_norm": 0.6332512497901917, "learning_rate": 1.1651651651651652e-05, "loss": 1.2401, "step": 4710 }, { "epoch": 0.692666789193163, "grad_norm": 0.48058363795280457, "learning_rate": 1.1611611611611613e-05, "loss": 1.2829, "step": 4711 }, { "epoch": 0.6928138209887889, "grad_norm": 0.6258144974708557, "learning_rate": 1.1571571571571572e-05, "loss": 0.8925, "step": 4712 }, { "epoch": 0.6929608527844147, "grad_norm": 0.436794251203537, "learning_rate": 1.1531531531531532e-05, "loss": 1.5933, "step": 4713 }, { "epoch": 0.6931078845800405, "grad_norm": 0.4678839147090912, "learning_rate": 1.1491491491491491e-05, "loss": 0.9616, "step": 4714 }, { "epoch": 0.6932549163756663, "grad_norm": 0.4710240066051483, "learning_rate": 1.1451451451451452e-05, "loss": 1.1795, "step": 4715 }, { "epoch": 0.6934019481712921, "grad_norm": 0.7333782911300659, "learning_rate": 1.1411411411411411e-05, "loss": 1.0186, "step": 4716 }, { "epoch": 0.6935489799669179, "grad_norm": 0.4025857746601105, "learning_rate": 1.1371371371371372e-05, "loss": 1.6632, "step": 4717 }, { "epoch": 0.6936960117625437, "grad_norm": 0.7182244062423706, "learning_rate": 1.1331331331331333e-05, "loss": 0.9291, "step": 4718 }, { "epoch": 0.6938430435581695, "grad_norm": 0.6934838891029358, "learning_rate": 1.1291291291291292e-05, "loss": 1.4032, "step": 4719 }, { "epoch": 0.6939900753537953, "grad_norm": 0.5921199917793274, "learning_rate": 1.1251251251251252e-05, "loss": 0.9857, "step": 4720 }, { "epoch": 0.6941371071494211, "grad_norm": 0.5566112995147705, "learning_rate": 1.1211211211211212e-05, "loss": 1.0004, "step": 4721 }, { "epoch": 0.6942841389450468, "grad_norm": 0.5625145435333252, "learning_rate": 1.1171171171171172e-05, "loss": 1.6883, "step": 4722 }, { "epoch": 0.6944311707406726, "grad_norm": 0.42089828848838806, "learning_rate": 1.1131131131131131e-05, "loss": 1.4938, "step": 4723 }, { "epoch": 0.6945782025362984, "grad_norm": 0.418562114238739, "learning_rate": 1.1091091091091092e-05, "loss": 1.4913, "step": 4724 }, { "epoch": 0.6947252343319242, "grad_norm": 0.3774343729019165, "learning_rate": 1.1051051051051053e-05, "loss": 1.5534, "step": 4725 }, { "epoch": 0.69487226612755, "grad_norm": 0.629325807094574, "learning_rate": 1.1011011011011012e-05, "loss": 1.4931, "step": 4726 }, { "epoch": 0.6950192979231758, "grad_norm": 0.3943207561969757, "learning_rate": 1.097097097097097e-05, "loss": 1.9451, "step": 4727 }, { "epoch": 0.6951663297188017, "grad_norm": 0.5050148367881775, "learning_rate": 1.0930930930930932e-05, "loss": 0.962, "step": 4728 }, { "epoch": 0.6953133615144275, "grad_norm": 0.6441386342048645, "learning_rate": 1.089089089089089e-05, "loss": 1.2731, "step": 4729 }, { "epoch": 0.6954603933100533, "grad_norm": 0.715423047542572, "learning_rate": 1.0850850850850851e-05, "loss": 0.813, "step": 4730 }, { "epoch": 0.6956074251056791, "grad_norm": 0.5756112933158875, "learning_rate": 1.0810810810810812e-05, "loss": 1.1226, "step": 4731 }, { "epoch": 0.6957544569013049, "grad_norm": 0.8386937379837036, "learning_rate": 1.0770770770770771e-05, "loss": 1.0605, "step": 4732 }, { "epoch": 0.6959014886969307, "grad_norm": 0.4576833248138428, "learning_rate": 1.0730730730730732e-05, "loss": 1.0445, "step": 4733 }, { "epoch": 0.6960485204925565, "grad_norm": 0.5189277529716492, "learning_rate": 1.0690690690690691e-05, "loss": 1.1603, "step": 4734 }, { "epoch": 0.6961955522881823, "grad_norm": 0.6907752752304077, "learning_rate": 1.0650650650650652e-05, "loss": 0.761, "step": 4735 }, { "epoch": 0.6963425840838081, "grad_norm": 0.5689253807067871, "learning_rate": 1.061061061061061e-05, "loss": 1.1016, "step": 4736 }, { "epoch": 0.6964896158794339, "grad_norm": 0.6815402507781982, "learning_rate": 1.057057057057057e-05, "loss": 1.2366, "step": 4737 }, { "epoch": 0.6966366476750597, "grad_norm": 0.5995508432388306, "learning_rate": 1.0530530530530532e-05, "loss": 1.3633, "step": 4738 }, { "epoch": 0.6967836794706855, "grad_norm": 0.6815082430839539, "learning_rate": 1.0490490490490491e-05, "loss": 1.1035, "step": 4739 }, { "epoch": 0.6969307112663113, "grad_norm": 0.5567446947097778, "learning_rate": 1.0450450450450452e-05, "loss": 1.2398, "step": 4740 }, { "epoch": 0.6970777430619371, "grad_norm": 0.5389345288276672, "learning_rate": 1.0410410410410411e-05, "loss": 1.2152, "step": 4741 }, { "epoch": 0.697224774857563, "grad_norm": 0.7926917672157288, "learning_rate": 1.037037037037037e-05, "loss": 0.9084, "step": 4742 }, { "epoch": 0.6973718066531888, "grad_norm": 0.7008495330810547, "learning_rate": 1.033033033033033e-05, "loss": 1.1421, "step": 4743 }, { "epoch": 0.6975188384488146, "grad_norm": 0.6069611310958862, "learning_rate": 1.0290290290290291e-05, "loss": 1.2662, "step": 4744 }, { "epoch": 0.6976658702444404, "grad_norm": 0.7000057697296143, "learning_rate": 1.025025025025025e-05, "loss": 1.421, "step": 4745 }, { "epoch": 0.6978129020400662, "grad_norm": 0.6728287935256958, "learning_rate": 1.0210210210210211e-05, "loss": 1.1578, "step": 4746 }, { "epoch": 0.697959933835692, "grad_norm": 0.7594440579414368, "learning_rate": 1.017017017017017e-05, "loss": 1.4397, "step": 4747 }, { "epoch": 0.6981069656313178, "grad_norm": 0.5390921831130981, "learning_rate": 1.0130130130130131e-05, "loss": 1.2044, "step": 4748 }, { "epoch": 0.6982539974269436, "grad_norm": 0.6537840962409973, "learning_rate": 1.009009009009009e-05, "loss": 1.1875, "step": 4749 }, { "epoch": 0.6984010292225694, "grad_norm": 0.6388206481933594, "learning_rate": 1.005005005005005e-05, "loss": 1.139, "step": 4750 }, { "epoch": 0.6985480610181952, "grad_norm": 0.4670422673225403, "learning_rate": 1.0010010010010011e-05, "loss": 1.6185, "step": 4751 }, { "epoch": 0.698695092813821, "grad_norm": 0.6562542915344238, "learning_rate": 9.96996996996997e-06, "loss": 1.1174, "step": 4752 }, { "epoch": 0.6988421246094468, "grad_norm": 0.5768960118293762, "learning_rate": 9.929929929929931e-06, "loss": 1.0299, "step": 4753 }, { "epoch": 0.6989891564050726, "grad_norm": 0.5748663544654846, "learning_rate": 9.88988988988989e-06, "loss": 1.2711, "step": 4754 }, { "epoch": 0.6991361882006984, "grad_norm": 0.39336830377578735, "learning_rate": 9.84984984984985e-06, "loss": 1.519, "step": 4755 }, { "epoch": 0.6992832199963243, "grad_norm": 0.615652322769165, "learning_rate": 9.80980980980981e-06, "loss": 1.4393, "step": 4756 }, { "epoch": 0.6994302517919501, "grad_norm": 0.7012773156166077, "learning_rate": 9.769769769769769e-06, "loss": 1.106, "step": 4757 }, { "epoch": 0.6995772835875758, "grad_norm": 0.5996436476707458, "learning_rate": 9.729729729729732e-06, "loss": 0.8525, "step": 4758 }, { "epoch": 0.6997243153832016, "grad_norm": 0.45107540488243103, "learning_rate": 9.68968968968969e-06, "loss": 1.4401, "step": 4759 }, { "epoch": 0.6998713471788274, "grad_norm": 0.5573094487190247, "learning_rate": 9.64964964964965e-06, "loss": 1.4764, "step": 4760 }, { "epoch": 0.7000183789744532, "grad_norm": 0.6246421337127686, "learning_rate": 9.60960960960961e-06, "loss": 1.072, "step": 4761 }, { "epoch": 0.700165410770079, "grad_norm": 0.396084189414978, "learning_rate": 9.56956956956957e-06, "loss": 1.7602, "step": 4762 }, { "epoch": 0.7003124425657048, "grad_norm": 0.6032221913337708, "learning_rate": 9.52952952952953e-06, "loss": 1.3621, "step": 4763 }, { "epoch": 0.7004594743613306, "grad_norm": 0.6196128726005554, "learning_rate": 9.489489489489491e-06, "loss": 1.1216, "step": 4764 }, { "epoch": 0.7006065061569564, "grad_norm": 0.8455750346183777, "learning_rate": 9.44944944944945e-06, "loss": 0.9158, "step": 4765 }, { "epoch": 0.7007535379525822, "grad_norm": 0.5908775925636292, "learning_rate": 9.40940940940941e-06, "loss": 1.4559, "step": 4766 }, { "epoch": 0.700900569748208, "grad_norm": 0.5480929613113403, "learning_rate": 9.36936936936937e-06, "loss": 0.6863, "step": 4767 }, { "epoch": 0.7010476015438338, "grad_norm": 0.6493448615074158, "learning_rate": 9.32932932932933e-06, "loss": 1.2858, "step": 4768 }, { "epoch": 0.7011946333394596, "grad_norm": 0.6093307137489319, "learning_rate": 9.28928928928929e-06, "loss": 1.2719, "step": 4769 }, { "epoch": 0.7013416651350854, "grad_norm": 0.8061597347259521, "learning_rate": 9.249249249249248e-06, "loss": 0.8847, "step": 4770 }, { "epoch": 0.7014886969307113, "grad_norm": 0.567462146282196, "learning_rate": 9.209209209209211e-06, "loss": 0.8519, "step": 4771 }, { "epoch": 0.7016357287263371, "grad_norm": 0.6819899082183838, "learning_rate": 9.16916916916917e-06, "loss": 1.0394, "step": 4772 }, { "epoch": 0.7017827605219629, "grad_norm": 0.4000406861305237, "learning_rate": 9.129129129129129e-06, "loss": 1.4494, "step": 4773 }, { "epoch": 0.7019297923175887, "grad_norm": 0.5413612127304077, "learning_rate": 9.08908908908909e-06, "loss": 1.6929, "step": 4774 }, { "epoch": 0.7020768241132145, "grad_norm": 0.6512224078178406, "learning_rate": 9.049049049049049e-06, "loss": 0.8545, "step": 4775 }, { "epoch": 0.7022238559088403, "grad_norm": 0.7475693821907043, "learning_rate": 9.00900900900901e-06, "loss": 0.9302, "step": 4776 }, { "epoch": 0.7023708877044661, "grad_norm": 0.6566969752311707, "learning_rate": 8.968968968968969e-06, "loss": 0.9383, "step": 4777 }, { "epoch": 0.7025179195000919, "grad_norm": 0.7932944297790527, "learning_rate": 8.92892892892893e-06, "loss": 1.3997, "step": 4778 }, { "epoch": 0.7026649512957177, "grad_norm": 0.43416523933410645, "learning_rate": 8.88888888888889e-06, "loss": 1.4699, "step": 4779 }, { "epoch": 0.7028119830913435, "grad_norm": 0.6515686511993408, "learning_rate": 8.848848848848849e-06, "loss": 1.3744, "step": 4780 }, { "epoch": 0.7029590148869693, "grad_norm": 0.49368369579315186, "learning_rate": 8.80880880880881e-06, "loss": 1.0424, "step": 4781 }, { "epoch": 0.7031060466825951, "grad_norm": 0.8274140954017639, "learning_rate": 8.768768768768769e-06, "loss": 1.2578, "step": 4782 }, { "epoch": 0.7032530784782209, "grad_norm": 0.5574043989181519, "learning_rate": 8.728728728728728e-06, "loss": 0.8729, "step": 4783 }, { "epoch": 0.7034001102738467, "grad_norm": 0.6545311808586121, "learning_rate": 8.68868868868869e-06, "loss": 0.9598, "step": 4784 }, { "epoch": 0.7035471420694726, "grad_norm": 0.5613349676132202, "learning_rate": 8.64864864864865e-06, "loss": 1.1249, "step": 4785 }, { "epoch": 0.7036941738650984, "grad_norm": 0.5201539397239685, "learning_rate": 8.60860860860861e-06, "loss": 1.0373, "step": 4786 }, { "epoch": 0.7038412056607242, "grad_norm": 0.4872850775718689, "learning_rate": 8.568568568568569e-06, "loss": 1.4882, "step": 4787 }, { "epoch": 0.70398823745635, "grad_norm": 0.6883703470230103, "learning_rate": 8.528528528528528e-06, "loss": 1.1936, "step": 4788 }, { "epoch": 0.7041352692519758, "grad_norm": 0.7159624099731445, "learning_rate": 8.488488488488489e-06, "loss": 1.002, "step": 4789 }, { "epoch": 0.7042823010476016, "grad_norm": 0.4242570996284485, "learning_rate": 8.448448448448448e-06, "loss": 1.3222, "step": 4790 }, { "epoch": 0.7044293328432274, "grad_norm": 0.6446855068206787, "learning_rate": 8.408408408408409e-06, "loss": 1.1771, "step": 4791 }, { "epoch": 0.7045763646388532, "grad_norm": 0.5693678855895996, "learning_rate": 8.36836836836837e-06, "loss": 1.1239, "step": 4792 }, { "epoch": 0.704723396434479, "grad_norm": 0.703027606010437, "learning_rate": 8.328328328328328e-06, "loss": 1.2706, "step": 4793 }, { "epoch": 0.7048704282301047, "grad_norm": 0.527990460395813, "learning_rate": 8.288288288288289e-06, "loss": 1.1907, "step": 4794 }, { "epoch": 0.7050174600257305, "grad_norm": 0.7958105206489563, "learning_rate": 8.248248248248248e-06, "loss": 0.8916, "step": 4795 }, { "epoch": 0.7051644918213563, "grad_norm": 0.5670918226242065, "learning_rate": 8.208208208208209e-06, "loss": 1.014, "step": 4796 }, { "epoch": 0.7053115236169821, "grad_norm": 0.5380349159240723, "learning_rate": 8.168168168168168e-06, "loss": 1.227, "step": 4797 }, { "epoch": 0.7054585554126079, "grad_norm": 0.6927957534790039, "learning_rate": 8.128128128128129e-06, "loss": 1.5442, "step": 4798 }, { "epoch": 0.7056055872082337, "grad_norm": 0.7635678648948669, "learning_rate": 8.08808808808809e-06, "loss": 1.2728, "step": 4799 }, { "epoch": 0.7057526190038595, "grad_norm": 0.7322510480880737, "learning_rate": 8.048048048048048e-06, "loss": 1.3544, "step": 4800 }, { "epoch": 0.7058996507994854, "grad_norm": 0.3985948860645294, "learning_rate": 8.008008008008007e-06, "loss": 1.5861, "step": 4801 }, { "epoch": 0.7060466825951112, "grad_norm": 0.5067280530929565, "learning_rate": 7.967967967967968e-06, "loss": 1.4518, "step": 4802 }, { "epoch": 0.706193714390737, "grad_norm": 0.5984216928482056, "learning_rate": 7.927927927927927e-06, "loss": 1.4833, "step": 4803 }, { "epoch": 0.7063407461863628, "grad_norm": 0.6710702180862427, "learning_rate": 7.887887887887888e-06, "loss": 0.8828, "step": 4804 }, { "epoch": 0.7064877779819886, "grad_norm": 0.4823906123638153, "learning_rate": 7.847847847847849e-06, "loss": 1.5997, "step": 4805 }, { "epoch": 0.7066348097776144, "grad_norm": 0.5790621638298035, "learning_rate": 7.807807807807808e-06, "loss": 1.1965, "step": 4806 }, { "epoch": 0.7067818415732402, "grad_norm": 0.5535963773727417, "learning_rate": 7.767767767767769e-06, "loss": 1.0814, "step": 4807 }, { "epoch": 0.706928873368866, "grad_norm": 0.48626866936683655, "learning_rate": 7.727727727727728e-06, "loss": 1.4898, "step": 4808 }, { "epoch": 0.7070759051644918, "grad_norm": 0.7139120101928711, "learning_rate": 7.687687687687688e-06, "loss": 0.9136, "step": 4809 }, { "epoch": 0.7072229369601176, "grad_norm": 0.6063426733016968, "learning_rate": 7.647647647647647e-06, "loss": 1.2629, "step": 4810 }, { "epoch": 0.7073699687557434, "grad_norm": 0.7624624967575073, "learning_rate": 7.607607607607609e-06, "loss": 1.0611, "step": 4811 }, { "epoch": 0.7075170005513692, "grad_norm": 0.550574779510498, "learning_rate": 7.567567567567568e-06, "loss": 1.5215, "step": 4812 }, { "epoch": 0.707664032346995, "grad_norm": 0.8475636839866638, "learning_rate": 7.527527527527528e-06, "loss": 0.8907, "step": 4813 }, { "epoch": 0.7078110641426208, "grad_norm": 0.7180082201957703, "learning_rate": 7.487487487487488e-06, "loss": 0.9644, "step": 4814 }, { "epoch": 0.7079580959382467, "grad_norm": 0.6092231273651123, "learning_rate": 7.447447447447448e-06, "loss": 1.4478, "step": 4815 }, { "epoch": 0.7081051277338725, "grad_norm": 0.42689308524131775, "learning_rate": 7.4074074074074075e-06, "loss": 1.6879, "step": 4816 }, { "epoch": 0.7082521595294983, "grad_norm": 0.9070277214050293, "learning_rate": 7.367367367367367e-06, "loss": 0.8811, "step": 4817 }, { "epoch": 0.7083991913251241, "grad_norm": 0.6886349320411682, "learning_rate": 7.327327327327328e-06, "loss": 0.8623, "step": 4818 }, { "epoch": 0.7085462231207499, "grad_norm": 0.6663856506347656, "learning_rate": 7.287287287287288e-06, "loss": 1.0881, "step": 4819 }, { "epoch": 0.7086932549163757, "grad_norm": 0.5463816523551941, "learning_rate": 7.247247247247248e-06, "loss": 0.9294, "step": 4820 }, { "epoch": 0.7088402867120015, "grad_norm": 0.7155319452285767, "learning_rate": 7.207207207207208e-06, "loss": 1.1895, "step": 4821 }, { "epoch": 0.7089873185076273, "grad_norm": 0.6011556386947632, "learning_rate": 7.167167167167168e-06, "loss": 1.5753, "step": 4822 }, { "epoch": 0.7091343503032531, "grad_norm": 0.7473141551017761, "learning_rate": 7.127127127127127e-06, "loss": 1.0063, "step": 4823 }, { "epoch": 0.7092813820988789, "grad_norm": 0.5789150595664978, "learning_rate": 7.0870870870870866e-06, "loss": 1.3156, "step": 4824 }, { "epoch": 0.7094284138945047, "grad_norm": 0.6396620273590088, "learning_rate": 7.047047047047048e-06, "loss": 0.8387, "step": 4825 }, { "epoch": 0.7095754456901305, "grad_norm": 0.4303806722164154, "learning_rate": 7.007007007007008e-06, "loss": 1.31, "step": 4826 }, { "epoch": 0.7097224774857563, "grad_norm": 0.8331279754638672, "learning_rate": 6.966966966966967e-06, "loss": 1.1405, "step": 4827 }, { "epoch": 0.7098695092813821, "grad_norm": 0.4489589035511017, "learning_rate": 6.926926926926927e-06, "loss": 1.2475, "step": 4828 }, { "epoch": 0.710016541077008, "grad_norm": 0.7450804114341736, "learning_rate": 6.886886886886887e-06, "loss": 0.9518, "step": 4829 }, { "epoch": 0.7101635728726337, "grad_norm": 0.7348255515098572, "learning_rate": 6.846846846846847e-06, "loss": 0.7775, "step": 4830 }, { "epoch": 0.7103106046682595, "grad_norm": 0.41175034642219543, "learning_rate": 6.8068068068068075e-06, "loss": 1.2117, "step": 4831 }, { "epoch": 0.7104576364638853, "grad_norm": 0.5689820647239685, "learning_rate": 6.766766766766767e-06, "loss": 0.9846, "step": 4832 }, { "epoch": 0.7106046682595111, "grad_norm": 0.46522438526153564, "learning_rate": 6.726726726726727e-06, "loss": 1.1037, "step": 4833 }, { "epoch": 0.7107517000551369, "grad_norm": 0.8428181409835815, "learning_rate": 6.686686686686687e-06, "loss": 1.0855, "step": 4834 }, { "epoch": 0.7108987318507627, "grad_norm": 0.5170731544494629, "learning_rate": 6.646646646646647e-06, "loss": 1.372, "step": 4835 }, { "epoch": 0.7110457636463885, "grad_norm": 0.529687225818634, "learning_rate": 6.606606606606607e-06, "loss": 0.9427, "step": 4836 }, { "epoch": 0.7111927954420143, "grad_norm": 0.6795393228530884, "learning_rate": 6.566566566566566e-06, "loss": 1.1147, "step": 4837 }, { "epoch": 0.7113398272376401, "grad_norm": 0.6347172856330872, "learning_rate": 6.5265265265265275e-06, "loss": 1.266, "step": 4838 }, { "epoch": 0.7114868590332659, "grad_norm": 0.6522908806800842, "learning_rate": 6.486486486486487e-06, "loss": 1.1316, "step": 4839 }, { "epoch": 0.7116338908288917, "grad_norm": 0.5680882334709167, "learning_rate": 6.446446446446447e-06, "loss": 0.9999, "step": 4840 }, { "epoch": 0.7117809226245175, "grad_norm": 0.7114275693893433, "learning_rate": 6.406406406406406e-06, "loss": 1.1068, "step": 4841 }, { "epoch": 0.7119279544201433, "grad_norm": 0.5043887495994568, "learning_rate": 6.366366366366366e-06, "loss": 1.5769, "step": 4842 }, { "epoch": 0.7120749862157691, "grad_norm": 0.6730671525001526, "learning_rate": 6.326326326326326e-06, "loss": 1.3552, "step": 4843 }, { "epoch": 0.712222018011395, "grad_norm": 0.6605958342552185, "learning_rate": 6.286286286286286e-06, "loss": 0.9454, "step": 4844 }, { "epoch": 0.7123690498070208, "grad_norm": 0.6411607265472412, "learning_rate": 6.246246246246247e-06, "loss": 1.1778, "step": 4845 }, { "epoch": 0.7125160816026466, "grad_norm": 0.551641047000885, "learning_rate": 6.206206206206207e-06, "loss": 0.866, "step": 4846 }, { "epoch": 0.7126631133982724, "grad_norm": 0.546031653881073, "learning_rate": 6.1661661661661665e-06, "loss": 1.1318, "step": 4847 }, { "epoch": 0.7128101451938982, "grad_norm": 0.6339780688285828, "learning_rate": 6.126126126126126e-06, "loss": 1.2178, "step": 4848 }, { "epoch": 0.712957176989524, "grad_norm": 0.6129500865936279, "learning_rate": 6.086086086086086e-06, "loss": 1.0801, "step": 4849 }, { "epoch": 0.7131042087851498, "grad_norm": 0.4868360757827759, "learning_rate": 6.046046046046046e-06, "loss": 1.4035, "step": 4850 }, { "epoch": 0.7132512405807756, "grad_norm": 0.48437029123306274, "learning_rate": 6.006006006006006e-06, "loss": 1.4611, "step": 4851 }, { "epoch": 0.7133982723764014, "grad_norm": 0.6922193169593811, "learning_rate": 5.965965965965966e-06, "loss": 1.1161, "step": 4852 }, { "epoch": 0.7135453041720272, "grad_norm": 0.48853909969329834, "learning_rate": 5.925925925925927e-06, "loss": 1.5849, "step": 4853 }, { "epoch": 0.713692335967653, "grad_norm": 0.6750875115394592, "learning_rate": 5.8858858858858865e-06, "loss": 1.008, "step": 4854 }, { "epoch": 0.7138393677632788, "grad_norm": 0.5698803067207336, "learning_rate": 5.845845845845846e-06, "loss": 0.9079, "step": 4855 }, { "epoch": 0.7139863995589046, "grad_norm": 0.5647537708282471, "learning_rate": 5.805805805805806e-06, "loss": 1.2769, "step": 4856 }, { "epoch": 0.7141334313545304, "grad_norm": 0.6443914771080017, "learning_rate": 5.765765765765766e-06, "loss": 1.2971, "step": 4857 }, { "epoch": 0.7142804631501563, "grad_norm": 0.5874809622764587, "learning_rate": 5.725725725725726e-06, "loss": 1.0405, "step": 4858 }, { "epoch": 0.7144274949457821, "grad_norm": 0.5935770869255066, "learning_rate": 5.685685685685686e-06, "loss": 1.2869, "step": 4859 }, { "epoch": 0.7145745267414079, "grad_norm": 0.4953063428401947, "learning_rate": 5.645645645645646e-06, "loss": 1.5336, "step": 4860 }, { "epoch": 0.7147215585370337, "grad_norm": 0.36027878522872925, "learning_rate": 5.605605605605606e-06, "loss": 1.1872, "step": 4861 }, { "epoch": 0.7148685903326595, "grad_norm": 0.5185924768447876, "learning_rate": 5.565565565565566e-06, "loss": 1.2875, "step": 4862 }, { "epoch": 0.7150156221282853, "grad_norm": 0.6035516858100891, "learning_rate": 5.525525525525526e-06, "loss": 1.4277, "step": 4863 }, { "epoch": 0.7151626539239111, "grad_norm": 0.5553094148635864, "learning_rate": 5.485485485485485e-06, "loss": 1.3287, "step": 4864 }, { "epoch": 0.7153096857195369, "grad_norm": 0.6213189959526062, "learning_rate": 5.445445445445445e-06, "loss": 0.9335, "step": 4865 }, { "epoch": 0.7154567175151626, "grad_norm": 0.5788190364837646, "learning_rate": 5.405405405405406e-06, "loss": 1.0111, "step": 4866 }, { "epoch": 0.7156037493107884, "grad_norm": 0.5156578421592712, "learning_rate": 5.365365365365366e-06, "loss": 1.3103, "step": 4867 }, { "epoch": 0.7157507811064142, "grad_norm": 0.49042651057243347, "learning_rate": 5.325325325325326e-06, "loss": 1.1024, "step": 4868 }, { "epoch": 0.71589781290204, "grad_norm": 0.6939415335655212, "learning_rate": 5.285285285285285e-06, "loss": 1.3197, "step": 4869 }, { "epoch": 0.7160448446976658, "grad_norm": 0.7665855288505554, "learning_rate": 5.2452452452452456e-06, "loss": 1.4261, "step": 4870 }, { "epoch": 0.7161918764932916, "grad_norm": 0.44681113958358765, "learning_rate": 5.2052052052052055e-06, "loss": 1.3907, "step": 4871 }, { "epoch": 0.7163389082889174, "grad_norm": 0.7512479424476624, "learning_rate": 5.165165165165165e-06, "loss": 1.2884, "step": 4872 }, { "epoch": 0.7164859400845432, "grad_norm": 0.4352273643016815, "learning_rate": 5.125125125125125e-06, "loss": 1.2472, "step": 4873 }, { "epoch": 0.7166329718801691, "grad_norm": 0.46873730421066284, "learning_rate": 5.085085085085085e-06, "loss": 1.4633, "step": 4874 }, { "epoch": 0.7167800036757949, "grad_norm": 0.5193659067153931, "learning_rate": 5.045045045045045e-06, "loss": 1.5759, "step": 4875 }, { "epoch": 0.7169270354714207, "grad_norm": 0.5168704390525818, "learning_rate": 5.005005005005006e-06, "loss": 1.3017, "step": 4876 }, { "epoch": 0.7170740672670465, "grad_norm": 0.45700693130493164, "learning_rate": 4.964964964964966e-06, "loss": 1.3861, "step": 4877 }, { "epoch": 0.7172210990626723, "grad_norm": 0.6002326607704163, "learning_rate": 4.924924924924925e-06, "loss": 1.4463, "step": 4878 }, { "epoch": 0.7173681308582981, "grad_norm": 0.5366953611373901, "learning_rate": 4.8848848848848846e-06, "loss": 1.1737, "step": 4879 }, { "epoch": 0.7175151626539239, "grad_norm": 0.7326520085334778, "learning_rate": 4.844844844844845e-06, "loss": 0.8333, "step": 4880 }, { "epoch": 0.7176621944495497, "grad_norm": 0.4022139310836792, "learning_rate": 4.804804804804805e-06, "loss": 1.3523, "step": 4881 }, { "epoch": 0.7178092262451755, "grad_norm": 0.6293331384658813, "learning_rate": 4.764764764764765e-06, "loss": 1.0606, "step": 4882 }, { "epoch": 0.7179562580408013, "grad_norm": 0.433308482170105, "learning_rate": 4.724724724724725e-06, "loss": 1.369, "step": 4883 }, { "epoch": 0.7181032898364271, "grad_norm": 0.6070244908332825, "learning_rate": 4.684684684684685e-06, "loss": 1.395, "step": 4884 }, { "epoch": 0.7182503216320529, "grad_norm": 0.6387912034988403, "learning_rate": 4.644644644644645e-06, "loss": 1.4627, "step": 4885 }, { "epoch": 0.7183973534276787, "grad_norm": 0.742790162563324, "learning_rate": 4.6046046046046055e-06, "loss": 1.2066, "step": 4886 }, { "epoch": 0.7185443852233045, "grad_norm": 0.5481721758842468, "learning_rate": 4.5645645645645645e-06, "loss": 1.1112, "step": 4887 }, { "epoch": 0.7186914170189304, "grad_norm": 0.4777321219444275, "learning_rate": 4.524524524524524e-06, "loss": 1.3698, "step": 4888 }, { "epoch": 0.7188384488145562, "grad_norm": 0.7695461511611938, "learning_rate": 4.484484484484484e-06, "loss": 1.0639, "step": 4889 }, { "epoch": 0.718985480610182, "grad_norm": 0.6035564541816711, "learning_rate": 4.444444444444445e-06, "loss": 0.9932, "step": 4890 }, { "epoch": 0.7191325124058078, "grad_norm": 0.7588567733764648, "learning_rate": 4.404404404404405e-06, "loss": 1.086, "step": 4891 }, { "epoch": 0.7192795442014336, "grad_norm": 0.6282131671905518, "learning_rate": 4.364364364364364e-06, "loss": 1.0573, "step": 4892 }, { "epoch": 0.7194265759970594, "grad_norm": 0.7284603118896484, "learning_rate": 4.324324324324325e-06, "loss": 1.1631, "step": 4893 }, { "epoch": 0.7195736077926852, "grad_norm": 0.5531557202339172, "learning_rate": 4.2842842842842845e-06, "loss": 0.918, "step": 4894 }, { "epoch": 0.719720639588311, "grad_norm": 0.6256856322288513, "learning_rate": 4.2442442442442444e-06, "loss": 1.0969, "step": 4895 }, { "epoch": 0.7198676713839368, "grad_norm": 0.4891110062599182, "learning_rate": 4.204204204204204e-06, "loss": 1.5339, "step": 4896 }, { "epoch": 0.7200147031795626, "grad_norm": 0.6516176462173462, "learning_rate": 4.164164164164164e-06, "loss": 0.8901, "step": 4897 }, { "epoch": 0.7201617349751884, "grad_norm": 0.508917510509491, "learning_rate": 4.124124124124124e-06, "loss": 1.8276, "step": 4898 }, { "epoch": 0.7203087667708142, "grad_norm": 0.5596896409988403, "learning_rate": 4.084084084084084e-06, "loss": 0.9757, "step": 4899 }, { "epoch": 0.72045579856644, "grad_norm": 0.6006924510002136, "learning_rate": 4.044044044044045e-06, "loss": 0.9969, "step": 4900 }, { "epoch": 0.7206028303620658, "grad_norm": 0.5713600516319275, "learning_rate": 4.004004004004004e-06, "loss": 0.9337, "step": 4901 }, { "epoch": 0.7207498621576915, "grad_norm": 0.5599650740623474, "learning_rate": 3.963963963963964e-06, "loss": 1.0201, "step": 4902 }, { "epoch": 0.7208968939533174, "grad_norm": 0.9650661945343018, "learning_rate": 3.923923923923924e-06, "loss": 1.0686, "step": 4903 }, { "epoch": 0.7210439257489432, "grad_norm": 0.6532158851623535, "learning_rate": 3.883883883883884e-06, "loss": 1.0734, "step": 4904 }, { "epoch": 0.721190957544569, "grad_norm": 0.4492966830730438, "learning_rate": 3.843843843843844e-06, "loss": 1.4353, "step": 4905 }, { "epoch": 0.7213379893401948, "grad_norm": 0.6201153993606567, "learning_rate": 3.8038038038038044e-06, "loss": 1.0339, "step": 4906 }, { "epoch": 0.7214850211358206, "grad_norm": 0.5962187051773071, "learning_rate": 3.763763763763764e-06, "loss": 0.946, "step": 4907 }, { "epoch": 0.7216320529314464, "grad_norm": 0.5661851167678833, "learning_rate": 3.723723723723724e-06, "loss": 1.0008, "step": 4908 }, { "epoch": 0.7217790847270722, "grad_norm": 0.7313871383666992, "learning_rate": 3.6836836836836837e-06, "loss": 1.1288, "step": 4909 }, { "epoch": 0.721926116522698, "grad_norm": 1.0546725988388062, "learning_rate": 3.643643643643644e-06, "loss": 1.1037, "step": 4910 }, { "epoch": 0.7220731483183238, "grad_norm": 0.7312703132629395, "learning_rate": 3.603603603603604e-06, "loss": 0.8792, "step": 4911 }, { "epoch": 0.7222201801139496, "grad_norm": 0.4744587242603302, "learning_rate": 3.5635635635635633e-06, "loss": 1.2259, "step": 4912 }, { "epoch": 0.7223672119095754, "grad_norm": 0.651309609413147, "learning_rate": 3.523523523523524e-06, "loss": 1.0794, "step": 4913 }, { "epoch": 0.7225142437052012, "grad_norm": 0.4482440948486328, "learning_rate": 3.4834834834834835e-06, "loss": 1.3381, "step": 4914 }, { "epoch": 0.722661275500827, "grad_norm": 0.7190211415290833, "learning_rate": 3.4434434434434434e-06, "loss": 0.9033, "step": 4915 }, { "epoch": 0.7228083072964528, "grad_norm": 0.5425156354904175, "learning_rate": 3.4034034034034037e-06, "loss": 1.6434, "step": 4916 }, { "epoch": 0.7229553390920787, "grad_norm": 0.541840672492981, "learning_rate": 3.3633633633633636e-06, "loss": 1.0586, "step": 4917 }, { "epoch": 0.7231023708877045, "grad_norm": 0.6316569447517395, "learning_rate": 3.3233233233233235e-06, "loss": 0.8851, "step": 4918 }, { "epoch": 0.7232494026833303, "grad_norm": 0.5687484741210938, "learning_rate": 3.283283283283283e-06, "loss": 1.0279, "step": 4919 }, { "epoch": 0.7233964344789561, "grad_norm": 0.5324671864509583, "learning_rate": 3.2432432432432437e-06, "loss": 1.0986, "step": 4920 }, { "epoch": 0.7235434662745819, "grad_norm": 0.8063516020774841, "learning_rate": 3.203203203203203e-06, "loss": 1.1648, "step": 4921 }, { "epoch": 0.7236904980702077, "grad_norm": 0.7343824505805969, "learning_rate": 3.163163163163163e-06, "loss": 0.924, "step": 4922 }, { "epoch": 0.7238375298658335, "grad_norm": 0.539222240447998, "learning_rate": 3.1231231231231234e-06, "loss": 1.4405, "step": 4923 }, { "epoch": 0.7239845616614593, "grad_norm": 0.536894679069519, "learning_rate": 3.0830830830830832e-06, "loss": 1.3221, "step": 4924 }, { "epoch": 0.7241315934570851, "grad_norm": 0.38193395733833313, "learning_rate": 3.043043043043043e-06, "loss": 1.512, "step": 4925 }, { "epoch": 0.7242786252527109, "grad_norm": 0.5945437550544739, "learning_rate": 3.003003003003003e-06, "loss": 1.0016, "step": 4926 }, { "epoch": 0.7244256570483367, "grad_norm": 0.5550779104232788, "learning_rate": 2.9629629629629633e-06, "loss": 1.1501, "step": 4927 }, { "epoch": 0.7245726888439625, "grad_norm": 0.7822507619857788, "learning_rate": 2.922922922922923e-06, "loss": 1.2525, "step": 4928 }, { "epoch": 0.7247197206395883, "grad_norm": 0.5713144540786743, "learning_rate": 2.882882882882883e-06, "loss": 1.4893, "step": 4929 }, { "epoch": 0.7248667524352141, "grad_norm": 0.8262823820114136, "learning_rate": 2.842842842842843e-06, "loss": 1.2384, "step": 4930 }, { "epoch": 0.72501378423084, "grad_norm": 0.632846474647522, "learning_rate": 2.802802802802803e-06, "loss": 1.0187, "step": 4931 }, { "epoch": 0.7251608160264658, "grad_norm": 0.7179580330848694, "learning_rate": 2.762762762762763e-06, "loss": 1.2768, "step": 4932 }, { "epoch": 0.7253078478220916, "grad_norm": 0.5237870216369629, "learning_rate": 2.7227227227227226e-06, "loss": 1.0763, "step": 4933 }, { "epoch": 0.7254548796177174, "grad_norm": 0.7858615517616272, "learning_rate": 2.682682682682683e-06, "loss": 1.1396, "step": 4934 }, { "epoch": 0.7256019114133432, "grad_norm": 0.5874305963516235, "learning_rate": 2.6426426426426424e-06, "loss": 1.3978, "step": 4935 }, { "epoch": 0.725748943208969, "grad_norm": 0.6390734314918518, "learning_rate": 2.6026026026026027e-06, "loss": 1.0346, "step": 4936 }, { "epoch": 0.7258959750045948, "grad_norm": 0.7040704488754272, "learning_rate": 2.5625625625625626e-06, "loss": 1.041, "step": 4937 }, { "epoch": 0.7260430068002206, "grad_norm": 0.6547687649726868, "learning_rate": 2.5225225225225225e-06, "loss": 1.1843, "step": 4938 }, { "epoch": 0.7261900385958463, "grad_norm": 0.6773231029510498, "learning_rate": 2.482482482482483e-06, "loss": 1.0266, "step": 4939 }, { "epoch": 0.7263370703914721, "grad_norm": 0.6795961856842041, "learning_rate": 2.4424424424424423e-06, "loss": 1.0171, "step": 4940 }, { "epoch": 0.7264841021870979, "grad_norm": 0.5472249984741211, "learning_rate": 2.4024024024024026e-06, "loss": 1.3394, "step": 4941 }, { "epoch": 0.7266311339827237, "grad_norm": 0.4432663917541504, "learning_rate": 2.3623623623623625e-06, "loss": 1.5829, "step": 4942 }, { "epoch": 0.7267781657783495, "grad_norm": 0.6520305275917053, "learning_rate": 2.3223223223223224e-06, "loss": 1.3185, "step": 4943 }, { "epoch": 0.7269251975739753, "grad_norm": 0.5278864502906799, "learning_rate": 2.2822822822822822e-06, "loss": 1.1325, "step": 4944 }, { "epoch": 0.7270722293696011, "grad_norm": 0.9458170533180237, "learning_rate": 2.242242242242242e-06, "loss": 1.0317, "step": 4945 }, { "epoch": 0.727219261165227, "grad_norm": 0.4967317581176758, "learning_rate": 2.2022022022022024e-06, "loss": 1.1116, "step": 4946 }, { "epoch": 0.7273662929608528, "grad_norm": 0.625003457069397, "learning_rate": 2.1621621621621623e-06, "loss": 0.958, "step": 4947 }, { "epoch": 0.7275133247564786, "grad_norm": 0.6260166764259338, "learning_rate": 2.1221221221221222e-06, "loss": 0.8117, "step": 4948 }, { "epoch": 0.7276603565521044, "grad_norm": 0.41517147421836853, "learning_rate": 2.082082082082082e-06, "loss": 1.5947, "step": 4949 }, { "epoch": 0.7278073883477302, "grad_norm": 0.5903592705726624, "learning_rate": 2.042042042042042e-06, "loss": 1.0348, "step": 4950 }, { "epoch": 0.727954420143356, "grad_norm": 0.6895639896392822, "learning_rate": 2.002002002002002e-06, "loss": 1.0764, "step": 4951 }, { "epoch": 0.7281014519389818, "grad_norm": 0.6782127022743225, "learning_rate": 1.961961961961962e-06, "loss": 1.037, "step": 4952 }, { "epoch": 0.7282484837346076, "grad_norm": 0.7095992565155029, "learning_rate": 1.921921921921922e-06, "loss": 0.9617, "step": 4953 }, { "epoch": 0.7283955155302334, "grad_norm": 0.5381293892860413, "learning_rate": 1.881881881881882e-06, "loss": 1.0632, "step": 4954 }, { "epoch": 0.7285425473258592, "grad_norm": 0.6589281558990479, "learning_rate": 1.8418418418418418e-06, "loss": 1.2332, "step": 4955 }, { "epoch": 0.728689579121485, "grad_norm": 0.70632004737854, "learning_rate": 1.801801801801802e-06, "loss": 1.3314, "step": 4956 }, { "epoch": 0.7288366109171108, "grad_norm": 0.5837297439575195, "learning_rate": 1.761761761761762e-06, "loss": 1.3918, "step": 4957 }, { "epoch": 0.7289836427127366, "grad_norm": 0.7660974860191345, "learning_rate": 1.7217217217217217e-06, "loss": 0.9728, "step": 4958 }, { "epoch": 0.7291306745083624, "grad_norm": 0.666443943977356, "learning_rate": 1.6816816816816818e-06, "loss": 0.8952, "step": 4959 }, { "epoch": 0.7292777063039882, "grad_norm": 0.5409120321273804, "learning_rate": 1.6416416416416415e-06, "loss": 1.3588, "step": 4960 }, { "epoch": 0.7294247380996141, "grad_norm": 0.6126105785369873, "learning_rate": 1.6016016016016016e-06, "loss": 0.8337, "step": 4961 }, { "epoch": 0.7295717698952399, "grad_norm": 0.621222972869873, "learning_rate": 1.5615615615615617e-06, "loss": 1.1682, "step": 4962 }, { "epoch": 0.7297188016908657, "grad_norm": 0.5410543084144592, "learning_rate": 1.5215215215215216e-06, "loss": 1.1886, "step": 4963 }, { "epoch": 0.7298658334864915, "grad_norm": 0.6338372230529785, "learning_rate": 1.4814814814814817e-06, "loss": 0.9814, "step": 4964 }, { "epoch": 0.7300128652821173, "grad_norm": 0.6976445913314819, "learning_rate": 1.4414414414414416e-06, "loss": 1.0539, "step": 4965 }, { "epoch": 0.7301598970777431, "grad_norm": 0.5901634097099304, "learning_rate": 1.4014014014014014e-06, "loss": 1.5385, "step": 4966 }, { "epoch": 0.7303069288733689, "grad_norm": 0.6377258896827698, "learning_rate": 1.3613613613613613e-06, "loss": 0.8974, "step": 4967 }, { "epoch": 0.7304539606689947, "grad_norm": 0.636111319065094, "learning_rate": 1.3213213213213212e-06, "loss": 0.9326, "step": 4968 }, { "epoch": 0.7306009924646205, "grad_norm": 0.6762256026268005, "learning_rate": 1.2812812812812813e-06, "loss": 1.1904, "step": 4969 }, { "epoch": 0.7307480242602463, "grad_norm": 0.665593683719635, "learning_rate": 1.2412412412412414e-06, "loss": 1.4498, "step": 4970 }, { "epoch": 0.7308950560558721, "grad_norm": 0.6244086027145386, "learning_rate": 1.2012012012012013e-06, "loss": 1.2675, "step": 4971 }, { "epoch": 0.7310420878514979, "grad_norm": 0.5667060613632202, "learning_rate": 1.1611611611611612e-06, "loss": 1.1512, "step": 4972 }, { "epoch": 0.7311891196471237, "grad_norm": 0.5728907585144043, "learning_rate": 1.121121121121121e-06, "loss": 0.988, "step": 4973 }, { "epoch": 0.7313361514427495, "grad_norm": 0.7327083349227905, "learning_rate": 1.0810810810810812e-06, "loss": 1.1384, "step": 4974 }, { "epoch": 0.7314831832383752, "grad_norm": 0.6096385717391968, "learning_rate": 1.041041041041041e-06, "loss": 1.5215, "step": 4975 }, { "epoch": 0.731630215034001, "grad_norm": 0.5352951288223267, "learning_rate": 1.001001001001001e-06, "loss": 1.2132, "step": 4976 }, { "epoch": 0.7317772468296269, "grad_norm": 0.63902348279953, "learning_rate": 9.60960960960961e-07, "loss": 1.2044, "step": 4977 }, { "epoch": 0.7319242786252527, "grad_norm": 0.7300208806991577, "learning_rate": 9.209209209209209e-07, "loss": 0.7599, "step": 4978 }, { "epoch": 0.7320713104208785, "grad_norm": 0.5334352254867554, "learning_rate": 8.80880880880881e-07, "loss": 1.0755, "step": 4979 }, { "epoch": 0.7322183422165043, "grad_norm": 0.6927406191825867, "learning_rate": 8.408408408408409e-07, "loss": 1.0057, "step": 4980 }, { "epoch": 0.7323653740121301, "grad_norm": 0.4365585744380951, "learning_rate": 8.008008008008008e-07, "loss": 1.6356, "step": 4981 }, { "epoch": 0.7325124058077559, "grad_norm": 0.6542779207229614, "learning_rate": 7.607607607607608e-07, "loss": 1.104, "step": 4982 }, { "epoch": 0.7326594376033817, "grad_norm": 0.509577214717865, "learning_rate": 7.207207207207208e-07, "loss": 1.2953, "step": 4983 }, { "epoch": 0.7328064693990075, "grad_norm": 0.5558030605316162, "learning_rate": 6.806806806806807e-07, "loss": 1.0236, "step": 4984 }, { "epoch": 0.7329535011946333, "grad_norm": 0.4987927973270416, "learning_rate": 6.406406406406407e-07, "loss": 1.4581, "step": 4985 }, { "epoch": 0.7331005329902591, "grad_norm": 0.6529518961906433, "learning_rate": 6.006006006006006e-07, "loss": 0.9289, "step": 4986 }, { "epoch": 0.7332475647858849, "grad_norm": 0.5340320467948914, "learning_rate": 5.605605605605605e-07, "loss": 1.4493, "step": 4987 }, { "epoch": 0.7333945965815107, "grad_norm": 0.451168417930603, "learning_rate": 5.205205205205205e-07, "loss": 1.7754, "step": 4988 }, { "epoch": 0.7335416283771365, "grad_norm": 0.5658267140388489, "learning_rate": 4.804804804804805e-07, "loss": 1.0288, "step": 4989 }, { "epoch": 0.7336886601727624, "grad_norm": 0.4356297552585602, "learning_rate": 4.404404404404405e-07, "loss": 1.4799, "step": 4990 }, { "epoch": 0.7338356919683882, "grad_norm": 0.6566826701164246, "learning_rate": 4.004004004004004e-07, "loss": 1.4561, "step": 4991 }, { "epoch": 0.733982723764014, "grad_norm": 0.4179493188858032, "learning_rate": 3.603603603603604e-07, "loss": 1.6062, "step": 4992 }, { "epoch": 0.7341297555596398, "grad_norm": 0.8616300225257874, "learning_rate": 3.2032032032032033e-07, "loss": 0.9389, "step": 4993 }, { "epoch": 0.7342767873552656, "grad_norm": 0.6610977053642273, "learning_rate": 2.8028028028028027e-07, "loss": 1.1322, "step": 4994 }, { "epoch": 0.7344238191508914, "grad_norm": 0.6668996214866638, "learning_rate": 2.4024024024024026e-07, "loss": 1.2973, "step": 4995 }, { "epoch": 0.7345708509465172, "grad_norm": 0.6065282821655273, "learning_rate": 2.002002002002002e-07, "loss": 1.2811, "step": 4996 }, { "epoch": 0.734717882742143, "grad_norm": 0.5197210907936096, "learning_rate": 1.6016016016016016e-07, "loss": 1.3361, "step": 4997 }, { "epoch": 0.7348649145377688, "grad_norm": 0.669392466545105, "learning_rate": 1.2012012012012013e-07, "loss": 1.3577, "step": 4998 }, { "epoch": 0.7350119463333946, "grad_norm": 0.6190925240516663, "learning_rate": 8.008008008008008e-08, "loss": 1.0674, "step": 4999 }, { "epoch": 0.7351589781290204, "grad_norm": 0.5323770642280579, "learning_rate": 4.004004004004004e-08, "loss": 1.1134, "step": 5000 } ], "logging_steps": 1, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.779140701961216e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }