{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 2144, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009328358208955224, "grad_norm": 2.5716091355448696, "learning_rate": 9.999994632276776e-06, "loss": 0.2592, "step": 1 }, { "epoch": 0.0018656716417910447, "grad_norm": 1.4176872886936753, "learning_rate": 9.999978529118625e-06, "loss": 0.2314, "step": 2 }, { "epoch": 0.002798507462686567, "grad_norm": 1.2305751842002133, "learning_rate": 9.999951690560122e-06, "loss": 0.2169, "step": 3 }, { "epoch": 0.0037313432835820895, "grad_norm": 1.00972169523541, "learning_rate": 9.999914116658896e-06, "loss": 0.1988, "step": 4 }, { "epoch": 0.0046641791044776115, "grad_norm": 0.9298276426611971, "learning_rate": 9.999865807495616e-06, "loss": 0.2345, "step": 5 }, { "epoch": 0.005597014925373134, "grad_norm": 0.8733627901669683, "learning_rate": 9.999806763174009e-06, "loss": 0.2193, "step": 6 }, { "epoch": 0.0065298507462686565, "grad_norm": 0.8445790958346638, "learning_rate": 9.999736983820849e-06, "loss": 0.2245, "step": 7 }, { "epoch": 0.007462686567164179, "grad_norm": 0.9505212033567885, "learning_rate": 9.999656469585957e-06, "loss": 0.2147, "step": 8 }, { "epoch": 0.008395522388059701, "grad_norm": 0.872776782318509, "learning_rate": 9.999565220642204e-06, "loss": 0.2075, "step": 9 }, { "epoch": 0.009328358208955223, "grad_norm": 0.9553854856179482, "learning_rate": 9.999463237185512e-06, "loss": 0.1989, "step": 10 }, { "epoch": 0.010261194029850746, "grad_norm": 0.9057145675607191, "learning_rate": 9.999350519434845e-06, "loss": 0.2, "step": 11 }, { "epoch": 0.011194029850746268, "grad_norm": 0.7818237651299873, "learning_rate": 9.999227067632223e-06, "loss": 0.1613, "step": 12 }, { "epoch": 0.012126865671641791, "grad_norm": 0.8153167874569075, "learning_rate": 9.999092882042703e-06, "loss": 0.1985, "step": 13 }, { "epoch": 0.013059701492537313, "grad_norm": 0.8419608059043064, "learning_rate": 9.998947962954395e-06, "loss": 0.2243, "step": 14 }, { "epoch": 0.013992537313432836, "grad_norm": 0.7672463951634255, "learning_rate": 9.998792310678454e-06, "loss": 0.1978, "step": 15 }, { "epoch": 0.014925373134328358, "grad_norm": 0.80552513348339, "learning_rate": 9.99862592554908e-06, "loss": 0.1734, "step": 16 }, { "epoch": 0.01585820895522388, "grad_norm": 0.7811012898107669, "learning_rate": 9.998448807923517e-06, "loss": 0.2218, "step": 17 }, { "epoch": 0.016791044776119403, "grad_norm": 0.7601022739300933, "learning_rate": 9.998260958182048e-06, "loss": 0.1761, "step": 18 }, { "epoch": 0.017723880597014924, "grad_norm": 0.8384878038096356, "learning_rate": 9.99806237672801e-06, "loss": 0.2006, "step": 19 }, { "epoch": 0.018656716417910446, "grad_norm": 0.808167982498518, "learning_rate": 9.997853063987768e-06, "loss": 0.2194, "step": 20 }, { "epoch": 0.01958955223880597, "grad_norm": 0.7713728971078038, "learning_rate": 9.997633020410742e-06, "loss": 0.2116, "step": 21 }, { "epoch": 0.020522388059701493, "grad_norm": 0.7901190965242796, "learning_rate": 9.997402246469382e-06, "loss": 0.1987, "step": 22 }, { "epoch": 0.021455223880597014, "grad_norm": 0.7313163901386927, "learning_rate": 9.997160742659176e-06, "loss": 0.1946, "step": 23 }, { "epoch": 0.022388059701492536, "grad_norm": 0.7486130244284178, "learning_rate": 9.996908509498662e-06, "loss": 0.1837, "step": 24 }, { "epoch": 0.02332089552238806, "grad_norm": 0.7860056670023866, "learning_rate": 9.996645547529402e-06, "loss": 0.2564, "step": 25 }, { "epoch": 0.024253731343283583, "grad_norm": 0.7581662969012045, "learning_rate": 9.996371857316e-06, "loss": 0.1746, "step": 26 }, { "epoch": 0.025186567164179104, "grad_norm": 0.6975015526073557, "learning_rate": 9.996087439446094e-06, "loss": 0.1981, "step": 27 }, { "epoch": 0.026119402985074626, "grad_norm": 0.722915263538137, "learning_rate": 9.995792294530356e-06, "loss": 0.1873, "step": 28 }, { "epoch": 0.027052238805970148, "grad_norm": 0.7894033813519917, "learning_rate": 9.995486423202485e-06, "loss": 0.2037, "step": 29 }, { "epoch": 0.027985074626865673, "grad_norm": 0.7803730749245305, "learning_rate": 9.995169826119215e-06, "loss": 0.1698, "step": 30 }, { "epoch": 0.028917910447761194, "grad_norm": 0.7090013520734859, "learning_rate": 9.994842503960308e-06, "loss": 0.163, "step": 31 }, { "epoch": 0.029850746268656716, "grad_norm": 0.7671222221830503, "learning_rate": 9.994504457428557e-06, "loss": 0.1871, "step": 32 }, { "epoch": 0.030783582089552237, "grad_norm": 0.7181669982884149, "learning_rate": 9.994155687249775e-06, "loss": 0.1615, "step": 33 }, { "epoch": 0.03171641791044776, "grad_norm": 0.7656111290236678, "learning_rate": 9.993796194172806e-06, "loss": 0.1947, "step": 34 }, { "epoch": 0.03264925373134328, "grad_norm": 0.7708705346897703, "learning_rate": 9.993425978969508e-06, "loss": 0.1717, "step": 35 }, { "epoch": 0.033582089552238806, "grad_norm": 0.7372839782886961, "learning_rate": 9.993045042434772e-06, "loss": 0.1607, "step": 36 }, { "epoch": 0.03451492537313433, "grad_norm": 0.7622165179731982, "learning_rate": 9.9926533853865e-06, "loss": 0.2001, "step": 37 }, { "epoch": 0.03544776119402985, "grad_norm": 0.7561087454836221, "learning_rate": 9.992251008665613e-06, "loss": 0.1628, "step": 38 }, { "epoch": 0.036380597014925374, "grad_norm": 0.7805984201661516, "learning_rate": 9.991837913136053e-06, "loss": 0.1936, "step": 39 }, { "epoch": 0.03731343283582089, "grad_norm": 0.7967976647022951, "learning_rate": 9.99141409968477e-06, "loss": 0.2534, "step": 40 }, { "epoch": 0.03824626865671642, "grad_norm": 0.8201729473160019, "learning_rate": 9.990979569221733e-06, "loss": 0.1935, "step": 41 }, { "epoch": 0.03917910447761194, "grad_norm": 0.7716282919548366, "learning_rate": 9.990534322679915e-06, "loss": 0.1746, "step": 42 }, { "epoch": 0.04011194029850746, "grad_norm": 0.7741299807005129, "learning_rate": 9.9900783610153e-06, "loss": 0.1985, "step": 43 }, { "epoch": 0.041044776119402986, "grad_norm": 0.8018266912486479, "learning_rate": 9.989611685206881e-06, "loss": 0.1774, "step": 44 }, { "epoch": 0.04197761194029851, "grad_norm": 0.7247121362637202, "learning_rate": 9.989134296256648e-06, "loss": 0.1561, "step": 45 }, { "epoch": 0.04291044776119403, "grad_norm": 0.7976683889132362, "learning_rate": 9.988646195189604e-06, "loss": 0.1924, "step": 46 }, { "epoch": 0.043843283582089554, "grad_norm": 0.7960757679950935, "learning_rate": 9.988147383053739e-06, "loss": 0.2126, "step": 47 }, { "epoch": 0.04477611940298507, "grad_norm": 0.8270428552329929, "learning_rate": 9.987637860920053e-06, "loss": 0.2075, "step": 48 }, { "epoch": 0.0457089552238806, "grad_norm": 0.6589596069467158, "learning_rate": 9.98711762988253e-06, "loss": 0.1537, "step": 49 }, { "epoch": 0.04664179104477612, "grad_norm": 0.8032543321038746, "learning_rate": 9.986586691058156e-06, "loss": 0.1738, "step": 50 }, { "epoch": 0.04757462686567164, "grad_norm": 0.7338478698902282, "learning_rate": 9.986045045586904e-06, "loss": 0.163, "step": 51 }, { "epoch": 0.048507462686567165, "grad_norm": 0.7070647060347579, "learning_rate": 9.985492694631733e-06, "loss": 0.1522, "step": 52 }, { "epoch": 0.049440298507462684, "grad_norm": 0.7738887660522782, "learning_rate": 9.984929639378593e-06, "loss": 0.1963, "step": 53 }, { "epoch": 0.05037313432835821, "grad_norm": 0.8284733062238204, "learning_rate": 9.98435588103641e-06, "loss": 0.2199, "step": 54 }, { "epoch": 0.051305970149253734, "grad_norm": 0.7524048963217287, "learning_rate": 9.983771420837098e-06, "loss": 0.1829, "step": 55 }, { "epoch": 0.05223880597014925, "grad_norm": 0.6733441938187664, "learning_rate": 9.983176260035546e-06, "loss": 0.1478, "step": 56 }, { "epoch": 0.05317164179104478, "grad_norm": 0.8021676298339038, "learning_rate": 9.982570399909612e-06, "loss": 0.183, "step": 57 }, { "epoch": 0.054104477611940295, "grad_norm": 0.7273357698831805, "learning_rate": 9.981953841760137e-06, "loss": 0.1704, "step": 58 }, { "epoch": 0.05503731343283582, "grad_norm": 0.7101599169161613, "learning_rate": 9.981326586910921e-06, "loss": 0.1605, "step": 59 }, { "epoch": 0.055970149253731345, "grad_norm": 0.7387314808908781, "learning_rate": 9.980688636708744e-06, "loss": 0.1517, "step": 60 }, { "epoch": 0.05690298507462686, "grad_norm": 0.81337027667646, "learning_rate": 9.980039992523335e-06, "loss": 0.1701, "step": 61 }, { "epoch": 0.05783582089552239, "grad_norm": 0.7692443006131827, "learning_rate": 9.979380655747395e-06, "loss": 0.1581, "step": 62 }, { "epoch": 0.058768656716417914, "grad_norm": 0.7503070529897649, "learning_rate": 9.978710627796577e-06, "loss": 0.1877, "step": 63 }, { "epoch": 0.05970149253731343, "grad_norm": 0.8316590077952102, "learning_rate": 9.978029910109491e-06, "loss": 0.2433, "step": 64 }, { "epoch": 0.06063432835820896, "grad_norm": 0.785356218632406, "learning_rate": 9.9773385041477e-06, "loss": 0.1657, "step": 65 }, { "epoch": 0.061567164179104475, "grad_norm": 0.7216389299030854, "learning_rate": 9.976636411395712e-06, "loss": 0.194, "step": 66 }, { "epoch": 0.0625, "grad_norm": 0.6804181250919621, "learning_rate": 9.975923633360985e-06, "loss": 0.1536, "step": 67 }, { "epoch": 0.06343283582089553, "grad_norm": 0.75664829210671, "learning_rate": 9.975200171573917e-06, "loss": 0.2002, "step": 68 }, { "epoch": 0.06436567164179105, "grad_norm": 0.7785289161498496, "learning_rate": 9.974466027587844e-06, "loss": 0.1706, "step": 69 }, { "epoch": 0.06529850746268656, "grad_norm": 0.7187003384325596, "learning_rate": 9.973721202979038e-06, "loss": 0.1863, "step": 70 }, { "epoch": 0.06623134328358209, "grad_norm": 0.8519236679312661, "learning_rate": 9.972965699346705e-06, "loss": 0.1732, "step": 71 }, { "epoch": 0.06716417910447761, "grad_norm": 0.7662064325794146, "learning_rate": 9.972199518312979e-06, "loss": 0.1605, "step": 72 }, { "epoch": 0.06809701492537314, "grad_norm": 0.8234198750393837, "learning_rate": 9.971422661522919e-06, "loss": 0.1814, "step": 73 }, { "epoch": 0.06902985074626866, "grad_norm": 0.7904084478962242, "learning_rate": 9.970635130644507e-06, "loss": 0.1555, "step": 74 }, { "epoch": 0.06996268656716417, "grad_norm": 0.6764185746614678, "learning_rate": 9.96983692736864e-06, "loss": 0.1749, "step": 75 }, { "epoch": 0.0708955223880597, "grad_norm": 0.6991842282927949, "learning_rate": 9.969028053409131e-06, "loss": 0.159, "step": 76 }, { "epoch": 0.07182835820895522, "grad_norm": 0.7818938491835299, "learning_rate": 9.968208510502708e-06, "loss": 0.2043, "step": 77 }, { "epoch": 0.07276119402985075, "grad_norm": 0.7623447194933451, "learning_rate": 9.967378300408998e-06, "loss": 0.1382, "step": 78 }, { "epoch": 0.07369402985074627, "grad_norm": 0.7077959187088998, "learning_rate": 9.966537424910542e-06, "loss": 0.1758, "step": 79 }, { "epoch": 0.07462686567164178, "grad_norm": 0.8018768480857912, "learning_rate": 9.965685885812773e-06, "loss": 0.219, "step": 80 }, { "epoch": 0.07555970149253731, "grad_norm": 0.7264201884140605, "learning_rate": 9.964823684944017e-06, "loss": 0.1657, "step": 81 }, { "epoch": 0.07649253731343283, "grad_norm": 0.7912758181651456, "learning_rate": 9.963950824155502e-06, "loss": 0.1695, "step": 82 }, { "epoch": 0.07742537313432836, "grad_norm": 0.8158169353453767, "learning_rate": 9.963067305321334e-06, "loss": 0.1886, "step": 83 }, { "epoch": 0.07835820895522388, "grad_norm": 0.752167845909033, "learning_rate": 9.96217313033851e-06, "loss": 0.213, "step": 84 }, { "epoch": 0.07929104477611941, "grad_norm": 0.7932221975268756, "learning_rate": 9.961268301126902e-06, "loss": 0.2128, "step": 85 }, { "epoch": 0.08022388059701492, "grad_norm": 0.7164474945664365, "learning_rate": 9.960352819629259e-06, "loss": 0.172, "step": 86 }, { "epoch": 0.08115671641791045, "grad_norm": 0.7700398342535782, "learning_rate": 9.959426687811202e-06, "loss": 0.1628, "step": 87 }, { "epoch": 0.08208955223880597, "grad_norm": 0.6987042466160981, "learning_rate": 9.958489907661217e-06, "loss": 0.1714, "step": 88 }, { "epoch": 0.0830223880597015, "grad_norm": 0.6495218177876403, "learning_rate": 9.957542481190656e-06, "loss": 0.1594, "step": 89 }, { "epoch": 0.08395522388059702, "grad_norm": 0.7631422478824094, "learning_rate": 9.95658441043373e-06, "loss": 0.201, "step": 90 }, { "epoch": 0.08488805970149253, "grad_norm": 0.7256360608060394, "learning_rate": 9.955615697447499e-06, "loss": 0.1868, "step": 91 }, { "epoch": 0.08582089552238806, "grad_norm": 0.702567587332803, "learning_rate": 9.95463634431188e-06, "loss": 0.1823, "step": 92 }, { "epoch": 0.08675373134328358, "grad_norm": 0.6658782349566956, "learning_rate": 9.953646353129626e-06, "loss": 0.1475, "step": 93 }, { "epoch": 0.08768656716417911, "grad_norm": 0.7086460226691392, "learning_rate": 9.952645726026344e-06, "loss": 0.1735, "step": 94 }, { "epoch": 0.08861940298507463, "grad_norm": 0.8022928611629967, "learning_rate": 9.951634465150463e-06, "loss": 0.2148, "step": 95 }, { "epoch": 0.08955223880597014, "grad_norm": 0.6829208286611236, "learning_rate": 9.950612572673255e-06, "loss": 0.1501, "step": 96 }, { "epoch": 0.09048507462686567, "grad_norm": 0.6704234848539978, "learning_rate": 9.949580050788813e-06, "loss": 0.1677, "step": 97 }, { "epoch": 0.0914179104477612, "grad_norm": 0.6479594048539816, "learning_rate": 9.948536901714052e-06, "loss": 0.1454, "step": 98 }, { "epoch": 0.09235074626865672, "grad_norm": 0.7347412293778997, "learning_rate": 9.947483127688708e-06, "loss": 0.1641, "step": 99 }, { "epoch": 0.09328358208955224, "grad_norm": 0.7950578366350712, "learning_rate": 9.946418730975326e-06, "loss": 0.233, "step": 100 }, { "epoch": 0.09421641791044776, "grad_norm": 0.7123746048105567, "learning_rate": 9.945343713859265e-06, "loss": 0.1896, "step": 101 }, { "epoch": 0.09514925373134328, "grad_norm": 0.6474593936898033, "learning_rate": 9.944258078648679e-06, "loss": 0.1377, "step": 102 }, { "epoch": 0.0960820895522388, "grad_norm": 0.7266882954953969, "learning_rate": 9.943161827674524e-06, "loss": 0.2071, "step": 103 }, { "epoch": 0.09701492537313433, "grad_norm": 0.7604483214184947, "learning_rate": 9.942054963290549e-06, "loss": 0.2182, "step": 104 }, { "epoch": 0.09794776119402986, "grad_norm": 0.668326044523115, "learning_rate": 9.940937487873291e-06, "loss": 0.1757, "step": 105 }, { "epoch": 0.09888059701492537, "grad_norm": 0.7048939010169323, "learning_rate": 9.939809403822069e-06, "loss": 0.1873, "step": 106 }, { "epoch": 0.09981343283582089, "grad_norm": 0.7280440356090995, "learning_rate": 9.938670713558983e-06, "loss": 0.1991, "step": 107 }, { "epoch": 0.10074626865671642, "grad_norm": 0.6542922210891479, "learning_rate": 9.9375214195289e-06, "loss": 0.1623, "step": 108 }, { "epoch": 0.10167910447761194, "grad_norm": 0.6617666962913497, "learning_rate": 9.936361524199457e-06, "loss": 0.1789, "step": 109 }, { "epoch": 0.10261194029850747, "grad_norm": 0.6585320728405929, "learning_rate": 9.935191030061052e-06, "loss": 0.1502, "step": 110 }, { "epoch": 0.10354477611940298, "grad_norm": 0.6750553927066317, "learning_rate": 9.934009939626841e-06, "loss": 0.1533, "step": 111 }, { "epoch": 0.1044776119402985, "grad_norm": 0.7057947545021366, "learning_rate": 9.932818255432733e-06, "loss": 0.178, "step": 112 }, { "epoch": 0.10541044776119403, "grad_norm": 0.7204533617244199, "learning_rate": 9.931615980037379e-06, "loss": 0.1634, "step": 113 }, { "epoch": 0.10634328358208955, "grad_norm": 0.6856404991812756, "learning_rate": 9.930403116022169e-06, "loss": 0.1865, "step": 114 }, { "epoch": 0.10727611940298508, "grad_norm": 0.7311429157191179, "learning_rate": 9.929179665991234e-06, "loss": 0.1933, "step": 115 }, { "epoch": 0.10820895522388059, "grad_norm": 0.6629841131103367, "learning_rate": 9.92794563257143e-06, "loss": 0.1631, "step": 116 }, { "epoch": 0.10914179104477612, "grad_norm": 0.7345158405973555, "learning_rate": 9.926701018412335e-06, "loss": 0.17, "step": 117 }, { "epoch": 0.11007462686567164, "grad_norm": 0.7406448672041654, "learning_rate": 9.925445826186246e-06, "loss": 0.2032, "step": 118 }, { "epoch": 0.11100746268656717, "grad_norm": 0.7397645448158385, "learning_rate": 9.924180058588177e-06, "loss": 0.1864, "step": 119 }, { "epoch": 0.11194029850746269, "grad_norm": 0.6972554480852652, "learning_rate": 9.92290371833584e-06, "loss": 0.175, "step": 120 }, { "epoch": 0.11287313432835822, "grad_norm": 0.7271644905410629, "learning_rate": 9.921616808169655e-06, "loss": 0.1862, "step": 121 }, { "epoch": 0.11380597014925373, "grad_norm": 0.6606944310262092, "learning_rate": 9.920319330852729e-06, "loss": 0.1478, "step": 122 }, { "epoch": 0.11473880597014925, "grad_norm": 0.7183256597422383, "learning_rate": 9.919011289170863e-06, "loss": 0.1896, "step": 123 }, { "epoch": 0.11567164179104478, "grad_norm": 0.7699397502863865, "learning_rate": 9.91769268593254e-06, "loss": 0.22, "step": 124 }, { "epoch": 0.1166044776119403, "grad_norm": 0.677110331818146, "learning_rate": 9.91636352396892e-06, "loss": 0.153, "step": 125 }, { "epoch": 0.11753731343283583, "grad_norm": 0.6695744824232048, "learning_rate": 9.915023806133833e-06, "loss": 0.1795, "step": 126 }, { "epoch": 0.11847014925373134, "grad_norm": 0.7074588602255707, "learning_rate": 9.913673535303768e-06, "loss": 0.1985, "step": 127 }, { "epoch": 0.11940298507462686, "grad_norm": 0.7088258461511756, "learning_rate": 9.91231271437788e-06, "loss": 0.1583, "step": 128 }, { "epoch": 0.12033582089552239, "grad_norm": 0.6740450909932992, "learning_rate": 9.910941346277976e-06, "loss": 0.1742, "step": 129 }, { "epoch": 0.12126865671641791, "grad_norm": 0.6557226364648141, "learning_rate": 9.909559433948501e-06, "loss": 0.1534, "step": 130 }, { "epoch": 0.12220149253731344, "grad_norm": 0.6640219469732173, "learning_rate": 9.908166980356548e-06, "loss": 0.1435, "step": 131 }, { "epoch": 0.12313432835820895, "grad_norm": 0.686894662550308, "learning_rate": 9.906763988491834e-06, "loss": 0.1575, "step": 132 }, { "epoch": 0.12406716417910447, "grad_norm": 0.6555566761506486, "learning_rate": 9.905350461366713e-06, "loss": 0.1479, "step": 133 }, { "epoch": 0.125, "grad_norm": 0.6765433437347098, "learning_rate": 9.903926402016153e-06, "loss": 0.1824, "step": 134 }, { "epoch": 0.1259328358208955, "grad_norm": 0.6918863781691122, "learning_rate": 9.902491813497735e-06, "loss": 0.1644, "step": 135 }, { "epoch": 0.12686567164179105, "grad_norm": 0.6457916495356006, "learning_rate": 9.901046698891648e-06, "loss": 0.1514, "step": 136 }, { "epoch": 0.12779850746268656, "grad_norm": 0.679579644074333, "learning_rate": 9.899591061300684e-06, "loss": 0.1648, "step": 137 }, { "epoch": 0.1287313432835821, "grad_norm": 0.6826699490931901, "learning_rate": 9.898124903850228e-06, "loss": 0.1832, "step": 138 }, { "epoch": 0.1296641791044776, "grad_norm": 0.7005997552399583, "learning_rate": 9.896648229688248e-06, "loss": 0.135, "step": 139 }, { "epoch": 0.13059701492537312, "grad_norm": 0.7127168529176046, "learning_rate": 9.895161041985295e-06, "loss": 0.1853, "step": 140 }, { "epoch": 0.13152985074626866, "grad_norm": 0.7341235679146974, "learning_rate": 9.893663343934496e-06, "loss": 0.2121, "step": 141 }, { "epoch": 0.13246268656716417, "grad_norm": 0.7078255234840847, "learning_rate": 9.892155138751542e-06, "loss": 0.1811, "step": 142 }, { "epoch": 0.1333955223880597, "grad_norm": 0.7038563218851055, "learning_rate": 9.890636429674684e-06, "loss": 0.1605, "step": 143 }, { "epoch": 0.13432835820895522, "grad_norm": 0.7164751446694825, "learning_rate": 9.889107219964726e-06, "loss": 0.2073, "step": 144 }, { "epoch": 0.13526119402985073, "grad_norm": 0.6649653874087514, "learning_rate": 9.887567512905019e-06, "loss": 0.1718, "step": 145 }, { "epoch": 0.13619402985074627, "grad_norm": 0.6857124691469317, "learning_rate": 9.886017311801449e-06, "loss": 0.1699, "step": 146 }, { "epoch": 0.13712686567164178, "grad_norm": 0.6468280849741533, "learning_rate": 9.884456619982437e-06, "loss": 0.1478, "step": 147 }, { "epoch": 0.13805970149253732, "grad_norm": 0.7091547669788161, "learning_rate": 9.882885440798928e-06, "loss": 0.1998, "step": 148 }, { "epoch": 0.13899253731343283, "grad_norm": 0.6692750743801303, "learning_rate": 9.881303777624385e-06, "loss": 0.127, "step": 149 }, { "epoch": 0.13992537313432835, "grad_norm": 0.7194603601529795, "learning_rate": 9.879711633854778e-06, "loss": 0.1813, "step": 150 }, { "epoch": 0.14085820895522388, "grad_norm": 0.7181137346161895, "learning_rate": 9.878109012908583e-06, "loss": 0.1843, "step": 151 }, { "epoch": 0.1417910447761194, "grad_norm": 0.7790730703939546, "learning_rate": 9.876495918226772e-06, "loss": 0.1926, "step": 152 }, { "epoch": 0.14272388059701493, "grad_norm": 0.6920980734457731, "learning_rate": 9.8748723532728e-06, "loss": 0.1625, "step": 153 }, { "epoch": 0.14365671641791045, "grad_norm": 0.6667806200911892, "learning_rate": 9.873238321532609e-06, "loss": 0.139, "step": 154 }, { "epoch": 0.14458955223880596, "grad_norm": 0.6911540933613247, "learning_rate": 9.871593826514607e-06, "loss": 0.1644, "step": 155 }, { "epoch": 0.1455223880597015, "grad_norm": 0.6820170032682461, "learning_rate": 9.869938871749676e-06, "loss": 0.1742, "step": 156 }, { "epoch": 0.146455223880597, "grad_norm": 0.705869263053654, "learning_rate": 9.86827346079115e-06, "loss": 0.1831, "step": 157 }, { "epoch": 0.14738805970149255, "grad_norm": 0.7401764298758692, "learning_rate": 9.866597597214815e-06, "loss": 0.1768, "step": 158 }, { "epoch": 0.14832089552238806, "grad_norm": 0.6235783017415548, "learning_rate": 9.864911284618899e-06, "loss": 0.1386, "step": 159 }, { "epoch": 0.14925373134328357, "grad_norm": 0.6889715549977337, "learning_rate": 9.863214526624065e-06, "loss": 0.1566, "step": 160 }, { "epoch": 0.1501865671641791, "grad_norm": 0.6846066974239702, "learning_rate": 9.861507326873407e-06, "loss": 0.1838, "step": 161 }, { "epoch": 0.15111940298507462, "grad_norm": 0.7167198695424138, "learning_rate": 9.859789689032434e-06, "loss": 0.1871, "step": 162 }, { "epoch": 0.15205223880597016, "grad_norm": 0.6973809985750336, "learning_rate": 9.858061616789068e-06, "loss": 0.141, "step": 163 }, { "epoch": 0.15298507462686567, "grad_norm": 0.7128052330624401, "learning_rate": 9.856323113853632e-06, "loss": 0.1677, "step": 164 }, { "epoch": 0.15391791044776118, "grad_norm": 0.7026297259583538, "learning_rate": 9.854574183958849e-06, "loss": 0.1688, "step": 165 }, { "epoch": 0.15485074626865672, "grad_norm": 0.7291157497535105, "learning_rate": 9.852814830859827e-06, "loss": 0.1907, "step": 166 }, { "epoch": 0.15578358208955223, "grad_norm": 0.6223328327121523, "learning_rate": 9.851045058334055e-06, "loss": 0.1336, "step": 167 }, { "epoch": 0.15671641791044777, "grad_norm": 0.6688461143782228, "learning_rate": 9.849264870181393e-06, "loss": 0.1587, "step": 168 }, { "epoch": 0.15764925373134328, "grad_norm": 0.6707956195356253, "learning_rate": 9.847474270224062e-06, "loss": 0.1468, "step": 169 }, { "epoch": 0.15858208955223882, "grad_norm": 0.7068059872520703, "learning_rate": 9.845673262306643e-06, "loss": 0.2034, "step": 170 }, { "epoch": 0.15951492537313433, "grad_norm": 0.6933382505830273, "learning_rate": 9.843861850296058e-06, "loss": 0.1658, "step": 171 }, { "epoch": 0.16044776119402984, "grad_norm": 0.6838688625431699, "learning_rate": 9.842040038081572e-06, "loss": 0.1795, "step": 172 }, { "epoch": 0.16138059701492538, "grad_norm": 0.7126262558674376, "learning_rate": 9.840207829574777e-06, "loss": 0.1879, "step": 173 }, { "epoch": 0.1623134328358209, "grad_norm": 0.657673035404047, "learning_rate": 9.838365228709588e-06, "loss": 0.1386, "step": 174 }, { "epoch": 0.16324626865671643, "grad_norm": 0.6672369720897195, "learning_rate": 9.836512239442237e-06, "loss": 0.1761, "step": 175 }, { "epoch": 0.16417910447761194, "grad_norm": 0.6704756382780347, "learning_rate": 9.834648865751254e-06, "loss": 0.1625, "step": 176 }, { "epoch": 0.16511194029850745, "grad_norm": 0.7327319968062547, "learning_rate": 9.832775111637469e-06, "loss": 0.1671, "step": 177 }, { "epoch": 0.166044776119403, "grad_norm": 0.8466241469824632, "learning_rate": 9.830890981124001e-06, "loss": 0.2112, "step": 178 }, { "epoch": 0.1669776119402985, "grad_norm": 0.6942099737195674, "learning_rate": 9.828996478256246e-06, "loss": 0.1699, "step": 179 }, { "epoch": 0.16791044776119404, "grad_norm": 0.6905533891036305, "learning_rate": 9.827091607101871e-06, "loss": 0.1829, "step": 180 }, { "epoch": 0.16884328358208955, "grad_norm": 0.6840944079490442, "learning_rate": 9.825176371750802e-06, "loss": 0.1635, "step": 181 }, { "epoch": 0.16977611940298507, "grad_norm": 0.7744830187111077, "learning_rate": 9.823250776315223e-06, "loss": 0.214, "step": 182 }, { "epoch": 0.1707089552238806, "grad_norm": 0.6804431296705352, "learning_rate": 9.82131482492956e-06, "loss": 0.1737, "step": 183 }, { "epoch": 0.17164179104477612, "grad_norm": 0.710378752143619, "learning_rate": 9.81936852175047e-06, "loss": 0.1802, "step": 184 }, { "epoch": 0.17257462686567165, "grad_norm": 0.666181384733026, "learning_rate": 9.817411870956843e-06, "loss": 0.1605, "step": 185 }, { "epoch": 0.17350746268656717, "grad_norm": 0.7098491811400729, "learning_rate": 9.81544487674978e-06, "loss": 0.205, "step": 186 }, { "epoch": 0.17444029850746268, "grad_norm": 0.7004713215752281, "learning_rate": 9.813467543352598e-06, "loss": 0.1748, "step": 187 }, { "epoch": 0.17537313432835822, "grad_norm": 0.755426478997948, "learning_rate": 9.811479875010801e-06, "loss": 0.2109, "step": 188 }, { "epoch": 0.17630597014925373, "grad_norm": 0.716720649043939, "learning_rate": 9.809481875992097e-06, "loss": 0.1718, "step": 189 }, { "epoch": 0.17723880597014927, "grad_norm": 0.7524453319708312, "learning_rate": 9.807473550586368e-06, "loss": 0.1842, "step": 190 }, { "epoch": 0.17817164179104478, "grad_norm": 0.6614551234991976, "learning_rate": 9.805454903105663e-06, "loss": 0.1485, "step": 191 }, { "epoch": 0.1791044776119403, "grad_norm": 0.7584728579406068, "learning_rate": 9.803425937884202e-06, "loss": 0.1786, "step": 192 }, { "epoch": 0.18003731343283583, "grad_norm": 0.6980198122484169, "learning_rate": 9.801386659278354e-06, "loss": 0.1766, "step": 193 }, { "epoch": 0.18097014925373134, "grad_norm": 0.7245993872138986, "learning_rate": 9.799337071666632e-06, "loss": 0.1914, "step": 194 }, { "epoch": 0.18190298507462688, "grad_norm": 0.7177336863112267, "learning_rate": 9.797277179449684e-06, "loss": 0.2011, "step": 195 }, { "epoch": 0.1828358208955224, "grad_norm": 0.6156089099363867, "learning_rate": 9.79520698705028e-06, "loss": 0.1411, "step": 196 }, { "epoch": 0.1837686567164179, "grad_norm": 0.646739445919383, "learning_rate": 9.793126498913313e-06, "loss": 0.1784, "step": 197 }, { "epoch": 0.18470149253731344, "grad_norm": 0.6829614551972487, "learning_rate": 9.791035719505773e-06, "loss": 0.1659, "step": 198 }, { "epoch": 0.18563432835820895, "grad_norm": 0.6397186094831334, "learning_rate": 9.788934653316751e-06, "loss": 0.1581, "step": 199 }, { "epoch": 0.1865671641791045, "grad_norm": 0.7026949570869688, "learning_rate": 9.786823304857424e-06, "loss": 0.186, "step": 200 }, { "epoch": 0.1875, "grad_norm": 0.6444778210341632, "learning_rate": 9.784701678661045e-06, "loss": 0.1475, "step": 201 }, { "epoch": 0.1884328358208955, "grad_norm": 0.6561900365409835, "learning_rate": 9.782569779282936e-06, "loss": 0.1298, "step": 202 }, { "epoch": 0.18936567164179105, "grad_norm": 0.7048874217746566, "learning_rate": 9.780427611300474e-06, "loss": 0.197, "step": 203 }, { "epoch": 0.19029850746268656, "grad_norm": 0.6801442112897755, "learning_rate": 9.778275179313084e-06, "loss": 0.1554, "step": 204 }, { "epoch": 0.1912313432835821, "grad_norm": 0.6994538765751637, "learning_rate": 9.776112487942234e-06, "loss": 0.1654, "step": 205 }, { "epoch": 0.1921641791044776, "grad_norm": 0.6246197952944624, "learning_rate": 9.77393954183141e-06, "loss": 0.153, "step": 206 }, { "epoch": 0.19309701492537312, "grad_norm": 0.689440444154527, "learning_rate": 9.771756345646126e-06, "loss": 0.1813, "step": 207 }, { "epoch": 0.19402985074626866, "grad_norm": 0.6952706965379034, "learning_rate": 9.769562904073896e-06, "loss": 0.1624, "step": 208 }, { "epoch": 0.19496268656716417, "grad_norm": 0.7838354432180551, "learning_rate": 9.767359221824236e-06, "loss": 0.2385, "step": 209 }, { "epoch": 0.1958955223880597, "grad_norm": 0.7306245780155641, "learning_rate": 9.765145303628649e-06, "loss": 0.1583, "step": 210 }, { "epoch": 0.19682835820895522, "grad_norm": 0.6295517195800474, "learning_rate": 9.762921154240614e-06, "loss": 0.1519, "step": 211 }, { "epoch": 0.19776119402985073, "grad_norm": 0.7208002458943412, "learning_rate": 9.76068677843558e-06, "loss": 0.1801, "step": 212 }, { "epoch": 0.19869402985074627, "grad_norm": 0.6924562745292359, "learning_rate": 9.75844218101095e-06, "loss": 0.168, "step": 213 }, { "epoch": 0.19962686567164178, "grad_norm": 0.6704776707410871, "learning_rate": 9.756187366786077e-06, "loss": 0.1688, "step": 214 }, { "epoch": 0.20055970149253732, "grad_norm": 0.7148013353700856, "learning_rate": 9.753922340602245e-06, "loss": 0.1691, "step": 215 }, { "epoch": 0.20149253731343283, "grad_norm": 0.6941981500376283, "learning_rate": 9.751647107322668e-06, "loss": 0.1687, "step": 216 }, { "epoch": 0.20242537313432835, "grad_norm": 0.6385235020051775, "learning_rate": 9.749361671832478e-06, "loss": 0.1487, "step": 217 }, { "epoch": 0.20335820895522388, "grad_norm": 0.7173246282752982, "learning_rate": 9.747066039038707e-06, "loss": 0.2056, "step": 218 }, { "epoch": 0.2042910447761194, "grad_norm": 0.6963765663910773, "learning_rate": 9.744760213870286e-06, "loss": 0.1884, "step": 219 }, { "epoch": 0.20522388059701493, "grad_norm": 0.725333622537384, "learning_rate": 9.742444201278022e-06, "loss": 0.1464, "step": 220 }, { "epoch": 0.20615671641791045, "grad_norm": 0.716915064898902, "learning_rate": 9.740118006234607e-06, "loss": 0.1623, "step": 221 }, { "epoch": 0.20708955223880596, "grad_norm": 0.6568690644006672, "learning_rate": 9.737781633734586e-06, "loss": 0.1378, "step": 222 }, { "epoch": 0.2080223880597015, "grad_norm": 0.6168865260353762, "learning_rate": 9.735435088794361e-06, "loss": 0.1494, "step": 223 }, { "epoch": 0.208955223880597, "grad_norm": 0.636672464654377, "learning_rate": 9.733078376452172e-06, "loss": 0.1341, "step": 224 }, { "epoch": 0.20988805970149255, "grad_norm": 0.6652892578334502, "learning_rate": 9.730711501768091e-06, "loss": 0.1703, "step": 225 }, { "epoch": 0.21082089552238806, "grad_norm": 0.6919138601602233, "learning_rate": 9.72833446982401e-06, "loss": 0.1735, "step": 226 }, { "epoch": 0.21175373134328357, "grad_norm": 0.707434737509963, "learning_rate": 9.725947285723629e-06, "loss": 0.1786, "step": 227 }, { "epoch": 0.2126865671641791, "grad_norm": 0.7270879146669134, "learning_rate": 9.723549954592447e-06, "loss": 0.1994, "step": 228 }, { "epoch": 0.21361940298507462, "grad_norm": 0.7048223412953941, "learning_rate": 9.721142481577744e-06, "loss": 0.1787, "step": 229 }, { "epoch": 0.21455223880597016, "grad_norm": 0.6441874883514996, "learning_rate": 9.718724871848581e-06, "loss": 0.1422, "step": 230 }, { "epoch": 0.21548507462686567, "grad_norm": 0.70024706017561, "learning_rate": 9.716297130595784e-06, "loss": 0.1972, "step": 231 }, { "epoch": 0.21641791044776118, "grad_norm": 0.6638404504437856, "learning_rate": 9.713859263031928e-06, "loss": 0.1497, "step": 232 }, { "epoch": 0.21735074626865672, "grad_norm": 0.7246809164561248, "learning_rate": 9.711411274391334e-06, "loss": 0.1928, "step": 233 }, { "epoch": 0.21828358208955223, "grad_norm": 0.6871530157882998, "learning_rate": 9.70895316993005e-06, "loss": 0.1383, "step": 234 }, { "epoch": 0.21921641791044777, "grad_norm": 0.6808116326825683, "learning_rate": 9.706484954925848e-06, "loss": 0.1605, "step": 235 }, { "epoch": 0.22014925373134328, "grad_norm": 0.6589345539108978, "learning_rate": 9.704006634678205e-06, "loss": 0.1792, "step": 236 }, { "epoch": 0.22108208955223882, "grad_norm": 0.6503350202496985, "learning_rate": 9.701518214508295e-06, "loss": 0.1581, "step": 237 }, { "epoch": 0.22201492537313433, "grad_norm": 0.6846072525419546, "learning_rate": 9.69901969975898e-06, "loss": 0.19, "step": 238 }, { "epoch": 0.22294776119402984, "grad_norm": 0.7086126131511166, "learning_rate": 9.696511095794794e-06, "loss": 0.1983, "step": 239 }, { "epoch": 0.22388059701492538, "grad_norm": 0.690321711310152, "learning_rate": 9.693992408001934e-06, "loss": 0.1738, "step": 240 }, { "epoch": 0.2248134328358209, "grad_norm": 0.6639802320272554, "learning_rate": 9.691463641788244e-06, "loss": 0.1764, "step": 241 }, { "epoch": 0.22574626865671643, "grad_norm": 0.6168765207195239, "learning_rate": 9.688924802583215e-06, "loss": 0.1269, "step": 242 }, { "epoch": 0.22667910447761194, "grad_norm": 0.6894522929277352, "learning_rate": 9.68637589583796e-06, "loss": 0.196, "step": 243 }, { "epoch": 0.22761194029850745, "grad_norm": 0.6979271217237258, "learning_rate": 9.683816927025212e-06, "loss": 0.1684, "step": 244 }, { "epoch": 0.228544776119403, "grad_norm": 0.656530541376929, "learning_rate": 9.6812479016393e-06, "loss": 0.15, "step": 245 }, { "epoch": 0.2294776119402985, "grad_norm": 0.6852293458920076, "learning_rate": 9.678668825196155e-06, "loss": 0.1632, "step": 246 }, { "epoch": 0.23041044776119404, "grad_norm": 0.680597913519962, "learning_rate": 9.676079703233283e-06, "loss": 0.1602, "step": 247 }, { "epoch": 0.23134328358208955, "grad_norm": 0.7014870396628629, "learning_rate": 9.673480541309761e-06, "loss": 0.1745, "step": 248 }, { "epoch": 0.23227611940298507, "grad_norm": 0.6851858552975675, "learning_rate": 9.670871345006221e-06, "loss": 0.1441, "step": 249 }, { "epoch": 0.2332089552238806, "grad_norm": 0.695383459036975, "learning_rate": 9.66825211992484e-06, "loss": 0.1679, "step": 250 }, { "epoch": 0.23414179104477612, "grad_norm": 0.6320358311917528, "learning_rate": 9.665622871689329e-06, "loss": 0.1478, "step": 251 }, { "epoch": 0.23507462686567165, "grad_norm": 0.6483243837370887, "learning_rate": 9.662983605944918e-06, "loss": 0.1574, "step": 252 }, { "epoch": 0.23600746268656717, "grad_norm": 0.6784544450366222, "learning_rate": 9.660334328358345e-06, "loss": 0.146, "step": 253 }, { "epoch": 0.23694029850746268, "grad_norm": 0.6853026566763575, "learning_rate": 9.65767504461785e-06, "loss": 0.1812, "step": 254 }, { "epoch": 0.23787313432835822, "grad_norm": 0.6647543593157091, "learning_rate": 9.65500576043315e-06, "loss": 0.1503, "step": 255 }, { "epoch": 0.23880597014925373, "grad_norm": 0.6578325332259585, "learning_rate": 9.652326481535434e-06, "loss": 0.1426, "step": 256 }, { "epoch": 0.23973880597014927, "grad_norm": 0.7098755370475601, "learning_rate": 9.649637213677357e-06, "loss": 0.1754, "step": 257 }, { "epoch": 0.24067164179104478, "grad_norm": 0.7149729182330767, "learning_rate": 9.646937962633014e-06, "loss": 0.2158, "step": 258 }, { "epoch": 0.2416044776119403, "grad_norm": 0.6815246567111986, "learning_rate": 9.64422873419794e-06, "loss": 0.1592, "step": 259 }, { "epoch": 0.24253731343283583, "grad_norm": 0.6782105728902877, "learning_rate": 9.64150953418909e-06, "loss": 0.1717, "step": 260 }, { "epoch": 0.24347014925373134, "grad_norm": 0.6701752797162092, "learning_rate": 9.63878036844483e-06, "loss": 0.1499, "step": 261 }, { "epoch": 0.24440298507462688, "grad_norm": 0.6519126366687685, "learning_rate": 9.636041242824921e-06, "loss": 0.147, "step": 262 }, { "epoch": 0.2453358208955224, "grad_norm": 0.7317817740526421, "learning_rate": 9.63329216321051e-06, "loss": 0.1774, "step": 263 }, { "epoch": 0.2462686567164179, "grad_norm": 0.7271479836474176, "learning_rate": 9.630533135504118e-06, "loss": 0.1875, "step": 264 }, { "epoch": 0.24720149253731344, "grad_norm": 0.7055747630575457, "learning_rate": 9.627764165629623e-06, "loss": 0.1602, "step": 265 }, { "epoch": 0.24813432835820895, "grad_norm": 0.6951004755224613, "learning_rate": 9.624985259532251e-06, "loss": 0.1709, "step": 266 }, { "epoch": 0.2490671641791045, "grad_norm": 0.7361556566885852, "learning_rate": 9.622196423178562e-06, "loss": 0.2022, "step": 267 }, { "epoch": 0.25, "grad_norm": 0.7064672801621328, "learning_rate": 9.619397662556434e-06, "loss": 0.1746, "step": 268 }, { "epoch": 0.25093283582089554, "grad_norm": 0.6920299408496513, "learning_rate": 9.61658898367506e-06, "loss": 0.1403, "step": 269 }, { "epoch": 0.251865671641791, "grad_norm": 0.6825328444947351, "learning_rate": 9.613770392564921e-06, "loss": 0.1498, "step": 270 }, { "epoch": 0.25279850746268656, "grad_norm": 0.7581744618465716, "learning_rate": 9.610941895277784e-06, "loss": 0.206, "step": 271 }, { "epoch": 0.2537313432835821, "grad_norm": 0.7367453505626524, "learning_rate": 9.608103497886687e-06, "loss": 0.1989, "step": 272 }, { "epoch": 0.25466417910447764, "grad_norm": 0.6917743086773166, "learning_rate": 9.605255206485922e-06, "loss": 0.1886, "step": 273 }, { "epoch": 0.2555970149253731, "grad_norm": 0.6287555846617786, "learning_rate": 9.602397027191026e-06, "loss": 0.1376, "step": 274 }, { "epoch": 0.25652985074626866, "grad_norm": 0.7039493223438446, "learning_rate": 9.599528966138763e-06, "loss": 0.1938, "step": 275 }, { "epoch": 0.2574626865671642, "grad_norm": 0.6385275232186186, "learning_rate": 9.596651029487116e-06, "loss": 0.1552, "step": 276 }, { "epoch": 0.2583955223880597, "grad_norm": 0.672738387210679, "learning_rate": 9.593763223415275e-06, "loss": 0.1745, "step": 277 }, { "epoch": 0.2593283582089552, "grad_norm": 0.6539912607833361, "learning_rate": 9.590865554123614e-06, "loss": 0.1491, "step": 278 }, { "epoch": 0.26026119402985076, "grad_norm": 0.7060261006749092, "learning_rate": 9.587958027833691e-06, "loss": 0.1593, "step": 279 }, { "epoch": 0.26119402985074625, "grad_norm": 0.7202533037579819, "learning_rate": 9.585040650788222e-06, "loss": 0.188, "step": 280 }, { "epoch": 0.2621268656716418, "grad_norm": 0.6414924399163524, "learning_rate": 9.582113429251076e-06, "loss": 0.1333, "step": 281 }, { "epoch": 0.2630597014925373, "grad_norm": 0.6715934510315527, "learning_rate": 9.579176369507262e-06, "loss": 0.1435, "step": 282 }, { "epoch": 0.26399253731343286, "grad_norm": 0.682454424191338, "learning_rate": 9.576229477862905e-06, "loss": 0.1633, "step": 283 }, { "epoch": 0.26492537313432835, "grad_norm": 0.6659390170983546, "learning_rate": 9.573272760645248e-06, "loss": 0.1712, "step": 284 }, { "epoch": 0.2658582089552239, "grad_norm": 0.777977820292407, "learning_rate": 9.570306224202625e-06, "loss": 0.2222, "step": 285 }, { "epoch": 0.2667910447761194, "grad_norm": 0.674595718637446, "learning_rate": 9.567329874904456e-06, "loss": 0.1631, "step": 286 }, { "epoch": 0.2677238805970149, "grad_norm": 0.7181774398646773, "learning_rate": 9.56434371914123e-06, "loss": 0.1804, "step": 287 }, { "epoch": 0.26865671641791045, "grad_norm": 0.6946142410862716, "learning_rate": 9.561347763324484e-06, "loss": 0.1927, "step": 288 }, { "epoch": 0.269589552238806, "grad_norm": 0.6013136127213021, "learning_rate": 9.55834201388681e-06, "loss": 0.1453, "step": 289 }, { "epoch": 0.27052238805970147, "grad_norm": 0.6591065486485034, "learning_rate": 9.555326477281816e-06, "loss": 0.1678, "step": 290 }, { "epoch": 0.271455223880597, "grad_norm": 0.6480450068693724, "learning_rate": 9.55230115998413e-06, "loss": 0.1444, "step": 291 }, { "epoch": 0.27238805970149255, "grad_norm": 0.6933230293223538, "learning_rate": 9.549266068489377e-06, "loss": 0.1361, "step": 292 }, { "epoch": 0.2733208955223881, "grad_norm": 0.7085463902937235, "learning_rate": 9.546221209314172e-06, "loss": 0.1803, "step": 293 }, { "epoch": 0.27425373134328357, "grad_norm": 0.7107877136425425, "learning_rate": 9.543166588996095e-06, "loss": 0.1774, "step": 294 }, { "epoch": 0.2751865671641791, "grad_norm": 0.7879289179127623, "learning_rate": 9.540102214093696e-06, "loss": 0.2269, "step": 295 }, { "epoch": 0.27611940298507465, "grad_norm": 0.6477990840577625, "learning_rate": 9.537028091186453e-06, "loss": 0.1483, "step": 296 }, { "epoch": 0.27705223880597013, "grad_norm": 0.712951717853517, "learning_rate": 9.533944226874787e-06, "loss": 0.1705, "step": 297 }, { "epoch": 0.27798507462686567, "grad_norm": 0.663150142934282, "learning_rate": 9.530850627780031e-06, "loss": 0.1636, "step": 298 }, { "epoch": 0.2789179104477612, "grad_norm": 0.6705160877657834, "learning_rate": 9.527747300544417e-06, "loss": 0.1718, "step": 299 }, { "epoch": 0.2798507462686567, "grad_norm": 0.64835912213447, "learning_rate": 9.524634251831064e-06, "loss": 0.1511, "step": 300 }, { "epoch": 0.28078358208955223, "grad_norm": 0.640381717830191, "learning_rate": 9.521511488323968e-06, "loss": 0.1558, "step": 301 }, { "epoch": 0.28171641791044777, "grad_norm": 0.7178707708096552, "learning_rate": 9.518379016727979e-06, "loss": 0.1759, "step": 302 }, { "epoch": 0.2826492537313433, "grad_norm": 0.697469201493512, "learning_rate": 9.515236843768796e-06, "loss": 0.1751, "step": 303 }, { "epoch": 0.2835820895522388, "grad_norm": 0.6125281101585521, "learning_rate": 9.512084976192944e-06, "loss": 0.125, "step": 304 }, { "epoch": 0.28451492537313433, "grad_norm": 0.6423382185214507, "learning_rate": 9.508923420767761e-06, "loss": 0.1752, "step": 305 }, { "epoch": 0.28544776119402987, "grad_norm": 0.6833757054642323, "learning_rate": 9.505752184281391e-06, "loss": 0.185, "step": 306 }, { "epoch": 0.28638059701492535, "grad_norm": 0.649588614044519, "learning_rate": 9.502571273542765e-06, "loss": 0.1325, "step": 307 }, { "epoch": 0.2873134328358209, "grad_norm": 0.676946737677583, "learning_rate": 9.499380695381577e-06, "loss": 0.1507, "step": 308 }, { "epoch": 0.28824626865671643, "grad_norm": 0.6591010301176943, "learning_rate": 9.496180456648287e-06, "loss": 0.1401, "step": 309 }, { "epoch": 0.2891791044776119, "grad_norm": 0.7486623200408136, "learning_rate": 9.492970564214093e-06, "loss": 0.1955, "step": 310 }, { "epoch": 0.29011194029850745, "grad_norm": 0.6391524003446851, "learning_rate": 9.489751024970917e-06, "loss": 0.1634, "step": 311 }, { "epoch": 0.291044776119403, "grad_norm": 0.646949717837791, "learning_rate": 9.486521845831403e-06, "loss": 0.1454, "step": 312 }, { "epoch": 0.29197761194029853, "grad_norm": 0.6745927493200208, "learning_rate": 9.48328303372888e-06, "loss": 0.1726, "step": 313 }, { "epoch": 0.292910447761194, "grad_norm": 0.7377814594304649, "learning_rate": 9.480034595617374e-06, "loss": 0.1711, "step": 314 }, { "epoch": 0.29384328358208955, "grad_norm": 0.6795031449731064, "learning_rate": 9.476776538471567e-06, "loss": 0.1423, "step": 315 }, { "epoch": 0.2947761194029851, "grad_norm": 0.7404895762824814, "learning_rate": 9.4735088692868e-06, "loss": 0.2126, "step": 316 }, { "epoch": 0.2957089552238806, "grad_norm": 0.6983632775643599, "learning_rate": 9.470231595079051e-06, "loss": 0.1662, "step": 317 }, { "epoch": 0.2966417910447761, "grad_norm": 0.6366840899984147, "learning_rate": 9.466944722884918e-06, "loss": 0.1313, "step": 318 }, { "epoch": 0.29757462686567165, "grad_norm": 0.6364871084769171, "learning_rate": 9.463648259761613e-06, "loss": 0.1486, "step": 319 }, { "epoch": 0.29850746268656714, "grad_norm": 0.6311529040101592, "learning_rate": 9.460342212786933e-06, "loss": 0.1317, "step": 320 }, { "epoch": 0.2994402985074627, "grad_norm": 0.683684337012126, "learning_rate": 9.45702658905926e-06, "loss": 0.1628, "step": 321 }, { "epoch": 0.3003731343283582, "grad_norm": 0.6813390846035873, "learning_rate": 9.453701395697528e-06, "loss": 0.1806, "step": 322 }, { "epoch": 0.30130597014925375, "grad_norm": 0.7169013427925693, "learning_rate": 9.450366639841232e-06, "loss": 0.2021, "step": 323 }, { "epoch": 0.30223880597014924, "grad_norm": 0.6962508869583867, "learning_rate": 9.447022328650382e-06, "loss": 0.1727, "step": 324 }, { "epoch": 0.3031716417910448, "grad_norm": 0.7250571045077082, "learning_rate": 9.44366846930552e-06, "loss": 0.1812, "step": 325 }, { "epoch": 0.3041044776119403, "grad_norm": 0.662672206289539, "learning_rate": 9.440305069007678e-06, "loss": 0.1685, "step": 326 }, { "epoch": 0.3050373134328358, "grad_norm": 0.7162357934091524, "learning_rate": 9.436932134978378e-06, "loss": 0.1851, "step": 327 }, { "epoch": 0.30597014925373134, "grad_norm": 0.7138990542768859, "learning_rate": 9.43354967445961e-06, "loss": 0.1793, "step": 328 }, { "epoch": 0.3069029850746269, "grad_norm": 0.6311990952989243, "learning_rate": 9.430157694713817e-06, "loss": 0.1613, "step": 329 }, { "epoch": 0.30783582089552236, "grad_norm": 0.6005912478010195, "learning_rate": 9.426756203023886e-06, "loss": 0.1324, "step": 330 }, { "epoch": 0.3087686567164179, "grad_norm": 0.678722888581518, "learning_rate": 9.42334520669312e-06, "loss": 0.1401, "step": 331 }, { "epoch": 0.30970149253731344, "grad_norm": 0.6927261160838745, "learning_rate": 9.419924713045234e-06, "loss": 0.1603, "step": 332 }, { "epoch": 0.310634328358209, "grad_norm": 0.7264349748262426, "learning_rate": 9.416494729424334e-06, "loss": 0.178, "step": 333 }, { "epoch": 0.31156716417910446, "grad_norm": 0.6415813283592631, "learning_rate": 9.413055263194902e-06, "loss": 0.1425, "step": 334 }, { "epoch": 0.3125, "grad_norm": 0.7112637461656635, "learning_rate": 9.409606321741776e-06, "loss": 0.1801, "step": 335 }, { "epoch": 0.31343283582089554, "grad_norm": 0.6725534964977484, "learning_rate": 9.406147912470142e-06, "loss": 0.1577, "step": 336 }, { "epoch": 0.314365671641791, "grad_norm": 0.6585949525233161, "learning_rate": 9.402680042805517e-06, "loss": 0.1336, "step": 337 }, { "epoch": 0.31529850746268656, "grad_norm": 0.7194796954436806, "learning_rate": 9.399202720193723e-06, "loss": 0.1847, "step": 338 }, { "epoch": 0.3162313432835821, "grad_norm": 0.6495295211768262, "learning_rate": 9.395715952100882e-06, "loss": 0.1652, "step": 339 }, { "epoch": 0.31716417910447764, "grad_norm": 0.6696468591105181, "learning_rate": 9.392219746013399e-06, "loss": 0.1403, "step": 340 }, { "epoch": 0.3180970149253731, "grad_norm": 0.6444717373521825, "learning_rate": 9.38871410943794e-06, "loss": 0.1662, "step": 341 }, { "epoch": 0.31902985074626866, "grad_norm": 0.6335752329207017, "learning_rate": 9.385199049901418e-06, "loss": 0.154, "step": 342 }, { "epoch": 0.3199626865671642, "grad_norm": 0.7519583658524612, "learning_rate": 9.381674574950981e-06, "loss": 0.2049, "step": 343 }, { "epoch": 0.3208955223880597, "grad_norm": 0.6252862692780166, "learning_rate": 9.378140692153991e-06, "loss": 0.1532, "step": 344 }, { "epoch": 0.3218283582089552, "grad_norm": 0.7133514415786062, "learning_rate": 9.374597409098011e-06, "loss": 0.2083, "step": 345 }, { "epoch": 0.32276119402985076, "grad_norm": 0.6430444631837913, "learning_rate": 9.371044733390786e-06, "loss": 0.1506, "step": 346 }, { "epoch": 0.32369402985074625, "grad_norm": 0.6272764208466327, "learning_rate": 9.367482672660226e-06, "loss": 0.1593, "step": 347 }, { "epoch": 0.3246268656716418, "grad_norm": 0.6421925473109799, "learning_rate": 9.363911234554394e-06, "loss": 0.1468, "step": 348 }, { "epoch": 0.3255597014925373, "grad_norm": 0.6428164852500166, "learning_rate": 9.360330426741488e-06, "loss": 0.1696, "step": 349 }, { "epoch": 0.32649253731343286, "grad_norm": 0.6153969329414786, "learning_rate": 9.356740256909822e-06, "loss": 0.1374, "step": 350 }, { "epoch": 0.32742537313432835, "grad_norm": 0.6797695263573942, "learning_rate": 9.353140732767811e-06, "loss": 0.175, "step": 351 }, { "epoch": 0.3283582089552239, "grad_norm": 0.677421939830271, "learning_rate": 9.349531862043952e-06, "loss": 0.1677, "step": 352 }, { "epoch": 0.3292910447761194, "grad_norm": 0.6582520470211229, "learning_rate": 9.345913652486815e-06, "loss": 0.1498, "step": 353 }, { "epoch": 0.3302238805970149, "grad_norm": 0.6429783646536413, "learning_rate": 9.342286111865023e-06, "loss": 0.1646, "step": 354 }, { "epoch": 0.33115671641791045, "grad_norm": 0.6629425225330308, "learning_rate": 9.338649247967221e-06, "loss": 0.171, "step": 355 }, { "epoch": 0.332089552238806, "grad_norm": 0.745802679262241, "learning_rate": 9.335003068602087e-06, "loss": 0.1883, "step": 356 }, { "epoch": 0.33302238805970147, "grad_norm": 0.6444876037184774, "learning_rate": 9.33134758159829e-06, "loss": 0.1592, "step": 357 }, { "epoch": 0.333955223880597, "grad_norm": 0.558860623381031, "learning_rate": 9.32768279480449e-06, "loss": 0.1148, "step": 358 }, { "epoch": 0.33488805970149255, "grad_norm": 0.722828571356857, "learning_rate": 9.32400871608931e-06, "loss": 0.1601, "step": 359 }, { "epoch": 0.3358208955223881, "grad_norm": 0.6959835349163733, "learning_rate": 9.320325353341325e-06, "loss": 0.1657, "step": 360 }, { "epoch": 0.33675373134328357, "grad_norm": 0.6675843148695296, "learning_rate": 9.316632714469044e-06, "loss": 0.1583, "step": 361 }, { "epoch": 0.3376865671641791, "grad_norm": 0.7456343942497266, "learning_rate": 9.312930807400893e-06, "loss": 0.1844, "step": 362 }, { "epoch": 0.33861940298507465, "grad_norm": 0.634148409257968, "learning_rate": 9.309219640085196e-06, "loss": 0.1383, "step": 363 }, { "epoch": 0.33955223880597013, "grad_norm": 0.6609120465451035, "learning_rate": 9.305499220490162e-06, "loss": 0.158, "step": 364 }, { "epoch": 0.34048507462686567, "grad_norm": 0.7459692021830586, "learning_rate": 9.30176955660386e-06, "loss": 0.1996, "step": 365 }, { "epoch": 0.3414179104477612, "grad_norm": 0.7200896108086582, "learning_rate": 9.298030656434215e-06, "loss": 0.1772, "step": 366 }, { "epoch": 0.3423507462686567, "grad_norm": 0.7126002849648378, "learning_rate": 9.294282528008981e-06, "loss": 0.1669, "step": 367 }, { "epoch": 0.34328358208955223, "grad_norm": 0.603130537938871, "learning_rate": 9.290525179375722e-06, "loss": 0.1179, "step": 368 }, { "epoch": 0.34421641791044777, "grad_norm": 0.6339355522829767, "learning_rate": 9.286758618601801e-06, "loss": 0.1451, "step": 369 }, { "epoch": 0.3451492537313433, "grad_norm": 0.693034227384758, "learning_rate": 9.28298285377436e-06, "loss": 0.1778, "step": 370 }, { "epoch": 0.3460820895522388, "grad_norm": 0.7059093833369595, "learning_rate": 9.279197893000305e-06, "loss": 0.1558, "step": 371 }, { "epoch": 0.34701492537313433, "grad_norm": 0.7347098578783331, "learning_rate": 9.275403744406282e-06, "loss": 0.1685, "step": 372 }, { "epoch": 0.34794776119402987, "grad_norm": 0.6307457341287032, "learning_rate": 9.271600416138669e-06, "loss": 0.1562, "step": 373 }, { "epoch": 0.34888059701492535, "grad_norm": 0.6388488662739861, "learning_rate": 9.26778791636355e-06, "loss": 0.1628, "step": 374 }, { "epoch": 0.3498134328358209, "grad_norm": 0.6770717518204264, "learning_rate": 9.263966253266705e-06, "loss": 0.1726, "step": 375 }, { "epoch": 0.35074626865671643, "grad_norm": 0.7047264664833355, "learning_rate": 9.260135435053583e-06, "loss": 0.1609, "step": 376 }, { "epoch": 0.3516791044776119, "grad_norm": 0.7015343437255913, "learning_rate": 9.256295469949295e-06, "loss": 0.1754, "step": 377 }, { "epoch": 0.35261194029850745, "grad_norm": 0.6155310397952164, "learning_rate": 9.252446366198586e-06, "loss": 0.138, "step": 378 }, { "epoch": 0.353544776119403, "grad_norm": 0.6936010487253714, "learning_rate": 9.248588132065828e-06, "loss": 0.1825, "step": 379 }, { "epoch": 0.35447761194029853, "grad_norm": 0.6780588889510007, "learning_rate": 9.244720775834993e-06, "loss": 0.1643, "step": 380 }, { "epoch": 0.355410447761194, "grad_norm": 0.6828468946762515, "learning_rate": 9.240844305809641e-06, "loss": 0.1722, "step": 381 }, { "epoch": 0.35634328358208955, "grad_norm": 0.6742516523241912, "learning_rate": 9.2369587303129e-06, "loss": 0.1562, "step": 382 }, { "epoch": 0.3572761194029851, "grad_norm": 0.6801959261831995, "learning_rate": 9.233064057687444e-06, "loss": 0.15, "step": 383 }, { "epoch": 0.3582089552238806, "grad_norm": 0.6909220279433498, "learning_rate": 9.229160296295488e-06, "loss": 0.1633, "step": 384 }, { "epoch": 0.3591417910447761, "grad_norm": 0.6437202150674314, "learning_rate": 9.225247454518752e-06, "loss": 0.1395, "step": 385 }, { "epoch": 0.36007462686567165, "grad_norm": 0.6441165962705107, "learning_rate": 9.221325540758459e-06, "loss": 0.1424, "step": 386 }, { "epoch": 0.36100746268656714, "grad_norm": 0.6793822775874866, "learning_rate": 9.217394563435306e-06, "loss": 0.1868, "step": 387 }, { "epoch": 0.3619402985074627, "grad_norm": 0.6148985335721807, "learning_rate": 9.213454530989454e-06, "loss": 0.1323, "step": 388 }, { "epoch": 0.3628731343283582, "grad_norm": 0.6692283342051085, "learning_rate": 9.209505451880504e-06, "loss": 0.1449, "step": 389 }, { "epoch": 0.36380597014925375, "grad_norm": 0.6840955653826101, "learning_rate": 9.205547334587483e-06, "loss": 0.1741, "step": 390 }, { "epoch": 0.36473880597014924, "grad_norm": 0.6147344917947238, "learning_rate": 9.201580187608818e-06, "loss": 0.1475, "step": 391 }, { "epoch": 0.3656716417910448, "grad_norm": 0.6708829450895963, "learning_rate": 9.197604019462334e-06, "loss": 0.1491, "step": 392 }, { "epoch": 0.3666044776119403, "grad_norm": 0.6378069932447783, "learning_rate": 9.193618838685213e-06, "loss": 0.143, "step": 393 }, { "epoch": 0.3675373134328358, "grad_norm": 0.6415348588146481, "learning_rate": 9.189624653833997e-06, "loss": 0.1519, "step": 394 }, { "epoch": 0.36847014925373134, "grad_norm": 0.6864021681916475, "learning_rate": 9.185621473484558e-06, "loss": 0.1816, "step": 395 }, { "epoch": 0.3694029850746269, "grad_norm": 0.651137843997707, "learning_rate": 9.18160930623208e-06, "loss": 0.1535, "step": 396 }, { "epoch": 0.37033582089552236, "grad_norm": 0.6551251559285896, "learning_rate": 9.177588160691044e-06, "loss": 0.1514, "step": 397 }, { "epoch": 0.3712686567164179, "grad_norm": 0.6457463842025614, "learning_rate": 9.173558045495212e-06, "loss": 0.1378, "step": 398 }, { "epoch": 0.37220149253731344, "grad_norm": 0.7022061823428762, "learning_rate": 9.169518969297598e-06, "loss": 0.1866, "step": 399 }, { "epoch": 0.373134328358209, "grad_norm": 0.6204702707955785, "learning_rate": 9.165470940770458e-06, "loss": 0.1518, "step": 400 }, { "epoch": 0.37406716417910446, "grad_norm": 0.6721167862679555, "learning_rate": 9.161413968605274e-06, "loss": 0.1689, "step": 401 }, { "epoch": 0.375, "grad_norm": 0.6687160120326161, "learning_rate": 9.157348061512728e-06, "loss": 0.1692, "step": 402 }, { "epoch": 0.37593283582089554, "grad_norm": 0.6099209894752342, "learning_rate": 9.15327322822268e-06, "loss": 0.1339, "step": 403 }, { "epoch": 0.376865671641791, "grad_norm": 0.6173543561075466, "learning_rate": 9.149189477484169e-06, "loss": 0.149, "step": 404 }, { "epoch": 0.37779850746268656, "grad_norm": 0.6169917970739728, "learning_rate": 9.145096818065365e-06, "loss": 0.1642, "step": 405 }, { "epoch": 0.3787313432835821, "grad_norm": 0.6806793351183142, "learning_rate": 9.140995258753577e-06, "loss": 0.1802, "step": 406 }, { "epoch": 0.37966417910447764, "grad_norm": 0.7574219161776983, "learning_rate": 9.136884808355219e-06, "loss": 0.2155, "step": 407 }, { "epoch": 0.3805970149253731, "grad_norm": 0.6851282227990966, "learning_rate": 9.132765475695795e-06, "loss": 0.154, "step": 408 }, { "epoch": 0.38152985074626866, "grad_norm": 0.6777633709270767, "learning_rate": 9.128637269619878e-06, "loss": 0.1599, "step": 409 }, { "epoch": 0.3824626865671642, "grad_norm": 0.6525135016414808, "learning_rate": 9.124500198991098e-06, "loss": 0.1552, "step": 410 }, { "epoch": 0.3833955223880597, "grad_norm": 0.6512927863650952, "learning_rate": 9.12035427269211e-06, "loss": 0.1467, "step": 411 }, { "epoch": 0.3843283582089552, "grad_norm": 0.667064075573666, "learning_rate": 9.116199499624596e-06, "loss": 0.1744, "step": 412 }, { "epoch": 0.38526119402985076, "grad_norm": 0.6613753100929451, "learning_rate": 9.112035888709219e-06, "loss": 0.1686, "step": 413 }, { "epoch": 0.38619402985074625, "grad_norm": 0.6412099590266713, "learning_rate": 9.107863448885624e-06, "loss": 0.1343, "step": 414 }, { "epoch": 0.3871268656716418, "grad_norm": 0.6604439261157088, "learning_rate": 9.103682189112413e-06, "loss": 0.1656, "step": 415 }, { "epoch": 0.3880597014925373, "grad_norm": 0.6115411249084648, "learning_rate": 9.099492118367123e-06, "loss": 0.1353, "step": 416 }, { "epoch": 0.38899253731343286, "grad_norm": 0.7002334911718856, "learning_rate": 9.095293245646212e-06, "loss": 0.1593, "step": 417 }, { "epoch": 0.38992537313432835, "grad_norm": 0.7323496180498393, "learning_rate": 9.091085579965034e-06, "loss": 0.2081, "step": 418 }, { "epoch": 0.3908582089552239, "grad_norm": 0.7320096978224062, "learning_rate": 9.08686913035782e-06, "loss": 0.1934, "step": 419 }, { "epoch": 0.3917910447761194, "grad_norm": 0.6529700530520359, "learning_rate": 9.082643905877665e-06, "loss": 0.131, "step": 420 }, { "epoch": 0.3927238805970149, "grad_norm": 0.6803243613594315, "learning_rate": 9.078409915596506e-06, "loss": 0.1618, "step": 421 }, { "epoch": 0.39365671641791045, "grad_norm": 0.6928619139860963, "learning_rate": 9.074167168605096e-06, "loss": 0.1442, "step": 422 }, { "epoch": 0.394589552238806, "grad_norm": 0.6194662213662936, "learning_rate": 9.069915674012995e-06, "loss": 0.1144, "step": 423 }, { "epoch": 0.39552238805970147, "grad_norm": 0.6915655760799579, "learning_rate": 9.065655440948536e-06, "loss": 0.182, "step": 424 }, { "epoch": 0.396455223880597, "grad_norm": 0.6604648324960356, "learning_rate": 9.061386478558822e-06, "loss": 0.1627, "step": 425 }, { "epoch": 0.39738805970149255, "grad_norm": 0.7111090172624033, "learning_rate": 9.057108796009697e-06, "loss": 0.1886, "step": 426 }, { "epoch": 0.3983208955223881, "grad_norm": 0.6283101109522261, "learning_rate": 9.052822402485727e-06, "loss": 0.1347, "step": 427 }, { "epoch": 0.39925373134328357, "grad_norm": 0.6149057702176511, "learning_rate": 9.048527307190182e-06, "loss": 0.157, "step": 428 }, { "epoch": 0.4001865671641791, "grad_norm": 0.6504541640622249, "learning_rate": 9.044223519345016e-06, "loss": 0.1404, "step": 429 }, { "epoch": 0.40111940298507465, "grad_norm": 0.6859771643626653, "learning_rate": 9.039911048190843e-06, "loss": 0.1576, "step": 430 }, { "epoch": 0.40205223880597013, "grad_norm": 0.6208700024957391, "learning_rate": 9.035589902986928e-06, "loss": 0.1443, "step": 431 }, { "epoch": 0.40298507462686567, "grad_norm": 0.6377238730820449, "learning_rate": 9.03126009301115e-06, "loss": 0.1619, "step": 432 }, { "epoch": 0.4039179104477612, "grad_norm": 0.6433366697154884, "learning_rate": 9.026921627560001e-06, "loss": 0.1589, "step": 433 }, { "epoch": 0.4048507462686567, "grad_norm": 0.6862713356025116, "learning_rate": 9.022574515948554e-06, "loss": 0.181, "step": 434 }, { "epoch": 0.40578358208955223, "grad_norm": 0.6237341406461523, "learning_rate": 9.018218767510445e-06, "loss": 0.1542, "step": 435 }, { "epoch": 0.40671641791044777, "grad_norm": 0.7285409745624158, "learning_rate": 9.013854391597856e-06, "loss": 0.2043, "step": 436 }, { "epoch": 0.4076492537313433, "grad_norm": 0.6521616610233495, "learning_rate": 9.009481397581489e-06, "loss": 0.1289, "step": 437 }, { "epoch": 0.4085820895522388, "grad_norm": 0.668194268106565, "learning_rate": 9.005099794850554e-06, "loss": 0.1683, "step": 438 }, { "epoch": 0.40951492537313433, "grad_norm": 0.6677737862555768, "learning_rate": 9.000709592812743e-06, "loss": 0.1632, "step": 439 }, { "epoch": 0.41044776119402987, "grad_norm": 0.684726002204508, "learning_rate": 8.996310800894215e-06, "loss": 0.1712, "step": 440 }, { "epoch": 0.41138059701492535, "grad_norm": 0.6647034721483477, "learning_rate": 8.991903428539566e-06, "loss": 0.1764, "step": 441 }, { "epoch": 0.4123134328358209, "grad_norm": 0.6711272299650977, "learning_rate": 8.987487485211817e-06, "loss": 0.1455, "step": 442 }, { "epoch": 0.41324626865671643, "grad_norm": 0.6673129436984016, "learning_rate": 8.983062980392394e-06, "loss": 0.1578, "step": 443 }, { "epoch": 0.4141791044776119, "grad_norm": 0.649700431651383, "learning_rate": 8.978629923581104e-06, "loss": 0.1581, "step": 444 }, { "epoch": 0.41511194029850745, "grad_norm": 0.6478253515057547, "learning_rate": 8.974188324296115e-06, "loss": 0.1481, "step": 445 }, { "epoch": 0.416044776119403, "grad_norm": 0.6896276783957347, "learning_rate": 8.969738192073939e-06, "loss": 0.1766, "step": 446 }, { "epoch": 0.41697761194029853, "grad_norm": 0.6542330864311697, "learning_rate": 8.965279536469406e-06, "loss": 0.1296, "step": 447 }, { "epoch": 0.417910447761194, "grad_norm": 0.7765852630841193, "learning_rate": 8.960812367055646e-06, "loss": 0.2072, "step": 448 }, { "epoch": 0.41884328358208955, "grad_norm": 0.6743674657148047, "learning_rate": 8.956336693424076e-06, "loss": 0.1964, "step": 449 }, { "epoch": 0.4197761194029851, "grad_norm": 0.6159344659481367, "learning_rate": 8.951852525184361e-06, "loss": 0.1484, "step": 450 }, { "epoch": 0.4207089552238806, "grad_norm": 0.6771461921925988, "learning_rate": 8.947359871964415e-06, "loss": 0.1922, "step": 451 }, { "epoch": 0.4216417910447761, "grad_norm": 0.6449301509119331, "learning_rate": 8.94285874341036e-06, "loss": 0.1692, "step": 452 }, { "epoch": 0.42257462686567165, "grad_norm": 0.6340129763370035, "learning_rate": 8.938349149186527e-06, "loss": 0.1537, "step": 453 }, { "epoch": 0.42350746268656714, "grad_norm": 0.7282568501721791, "learning_rate": 8.933831098975416e-06, "loss": 0.195, "step": 454 }, { "epoch": 0.4244402985074627, "grad_norm": 0.6606552894147141, "learning_rate": 8.929304602477681e-06, "loss": 0.1762, "step": 455 }, { "epoch": 0.4253731343283582, "grad_norm": 0.6264539697098969, "learning_rate": 8.924769669412117e-06, "loss": 0.1609, "step": 456 }, { "epoch": 0.42630597014925375, "grad_norm": 0.6185134018306179, "learning_rate": 8.92022630951563e-06, "loss": 0.1509, "step": 457 }, { "epoch": 0.42723880597014924, "grad_norm": 0.6289765507169651, "learning_rate": 8.915674532543218e-06, "loss": 0.1551, "step": 458 }, { "epoch": 0.4281716417910448, "grad_norm": 0.6402687092511271, "learning_rate": 8.911114348267954e-06, "loss": 0.1593, "step": 459 }, { "epoch": 0.4291044776119403, "grad_norm": 0.6150721447464502, "learning_rate": 8.906545766480961e-06, "loss": 0.1495, "step": 460 }, { "epoch": 0.4300373134328358, "grad_norm": 0.6088571427706689, "learning_rate": 8.90196879699139e-06, "loss": 0.1372, "step": 461 }, { "epoch": 0.43097014925373134, "grad_norm": 0.7567087478126865, "learning_rate": 8.897383449626407e-06, "loss": 0.1865, "step": 462 }, { "epoch": 0.4319029850746269, "grad_norm": 0.664044295254781, "learning_rate": 8.892789734231158e-06, "loss": 0.1807, "step": 463 }, { "epoch": 0.43283582089552236, "grad_norm": 0.6627026532414828, "learning_rate": 8.888187660668762e-06, "loss": 0.1706, "step": 464 }, { "epoch": 0.4337686567164179, "grad_norm": 0.6958848417322209, "learning_rate": 8.88357723882028e-06, "loss": 0.1678, "step": 465 }, { "epoch": 0.43470149253731344, "grad_norm": 0.6294431568029407, "learning_rate": 8.878958478584702e-06, "loss": 0.1499, "step": 466 }, { "epoch": 0.435634328358209, "grad_norm": 0.6843226495579973, "learning_rate": 8.87433138987892e-06, "loss": 0.1727, "step": 467 }, { "epoch": 0.43656716417910446, "grad_norm": 0.6187079099244909, "learning_rate": 8.869695982637703e-06, "loss": 0.1438, "step": 468 }, { "epoch": 0.4375, "grad_norm": 0.6588650485620677, "learning_rate": 8.865052266813686e-06, "loss": 0.1482, "step": 469 }, { "epoch": 0.43843283582089554, "grad_norm": 0.7126058976976541, "learning_rate": 8.86040025237734e-06, "loss": 0.1904, "step": 470 }, { "epoch": 0.439365671641791, "grad_norm": 0.6824317477228325, "learning_rate": 8.855739949316957e-06, "loss": 0.1708, "step": 471 }, { "epoch": 0.44029850746268656, "grad_norm": 0.6337466996117702, "learning_rate": 8.851071367638625e-06, "loss": 0.164, "step": 472 }, { "epoch": 0.4412313432835821, "grad_norm": 0.6924621378270575, "learning_rate": 8.846394517366202e-06, "loss": 0.1686, "step": 473 }, { "epoch": 0.44216417910447764, "grad_norm": 0.6559004081848102, "learning_rate": 8.841709408541304e-06, "loss": 0.1667, "step": 474 }, { "epoch": 0.4430970149253731, "grad_norm": 0.704053407105582, "learning_rate": 8.837016051223281e-06, "loss": 0.1875, "step": 475 }, { "epoch": 0.44402985074626866, "grad_norm": 0.6412415713466533, "learning_rate": 8.832314455489188e-06, "loss": 0.1536, "step": 476 }, { "epoch": 0.4449626865671642, "grad_norm": 0.673140175891215, "learning_rate": 8.827604631433771e-06, "loss": 0.1663, "step": 477 }, { "epoch": 0.4458955223880597, "grad_norm": 0.6972144380627927, "learning_rate": 8.822886589169443e-06, "loss": 0.1474, "step": 478 }, { "epoch": 0.4468283582089552, "grad_norm": 0.6602020678647472, "learning_rate": 8.818160338826262e-06, "loss": 0.1332, "step": 479 }, { "epoch": 0.44776119402985076, "grad_norm": 0.651829279394813, "learning_rate": 8.81342589055191e-06, "loss": 0.1469, "step": 480 }, { "epoch": 0.44869402985074625, "grad_norm": 0.6996237770978309, "learning_rate": 8.80868325451167e-06, "loss": 0.1497, "step": 481 }, { "epoch": 0.4496268656716418, "grad_norm": 0.6075203927646219, "learning_rate": 8.803932440888404e-06, "loss": 0.1311, "step": 482 }, { "epoch": 0.4505597014925373, "grad_norm": 0.700278310703, "learning_rate": 8.799173459882534e-06, "loss": 0.1698, "step": 483 }, { "epoch": 0.45149253731343286, "grad_norm": 0.6738949369551479, "learning_rate": 8.794406321712017e-06, "loss": 0.1602, "step": 484 }, { "epoch": 0.45242537313432835, "grad_norm": 0.638078477110568, "learning_rate": 8.789631036612324e-06, "loss": 0.1469, "step": 485 }, { "epoch": 0.4533582089552239, "grad_norm": 0.692495606894183, "learning_rate": 8.784847614836418e-06, "loss": 0.1651, "step": 486 }, { "epoch": 0.4542910447761194, "grad_norm": 0.6324065506780772, "learning_rate": 8.780056066654734e-06, "loss": 0.1338, "step": 487 }, { "epoch": 0.4552238805970149, "grad_norm": 0.638408674473698, "learning_rate": 8.775256402355155e-06, "loss": 0.1376, "step": 488 }, { "epoch": 0.45615671641791045, "grad_norm": 0.7229808635813268, "learning_rate": 8.770448632242984e-06, "loss": 0.1946, "step": 489 }, { "epoch": 0.457089552238806, "grad_norm": 0.7082121586414268, "learning_rate": 8.765632766640937e-06, "loss": 0.153, "step": 490 }, { "epoch": 0.45802238805970147, "grad_norm": 0.7022902510822524, "learning_rate": 8.760808815889105e-06, "loss": 0.1658, "step": 491 }, { "epoch": 0.458955223880597, "grad_norm": 0.6460764679138236, "learning_rate": 8.755976790344945e-06, "loss": 0.1551, "step": 492 }, { "epoch": 0.45988805970149255, "grad_norm": 0.6634319997790233, "learning_rate": 8.751136700383243e-06, "loss": 0.1567, "step": 493 }, { "epoch": 0.4608208955223881, "grad_norm": 0.6498570037970721, "learning_rate": 8.746288556396104e-06, "loss": 0.1676, "step": 494 }, { "epoch": 0.46175373134328357, "grad_norm": 0.7124862098820738, "learning_rate": 8.74143236879293e-06, "loss": 0.1771, "step": 495 }, { "epoch": 0.4626865671641791, "grad_norm": 0.6148997671165092, "learning_rate": 8.736568148000386e-06, "loss": 0.1377, "step": 496 }, { "epoch": 0.46361940298507465, "grad_norm": 0.6616412962473447, "learning_rate": 8.731695904462389e-06, "loss": 0.1771, "step": 497 }, { "epoch": 0.46455223880597013, "grad_norm": 0.6200360868219392, "learning_rate": 8.726815648640084e-06, "loss": 0.1422, "step": 498 }, { "epoch": 0.46548507462686567, "grad_norm": 0.6307154448085411, "learning_rate": 8.721927391011812e-06, "loss": 0.1541, "step": 499 }, { "epoch": 0.4664179104477612, "grad_norm": 0.7155764510503408, "learning_rate": 8.7170311420731e-06, "loss": 0.1947, "step": 500 }, { "epoch": 0.4664179104477612, "eval_loss": 0.17030416429042816, "eval_runtime": 4.1961, "eval_samples_per_second": 20.733, "eval_steps_per_second": 5.243, "step": 500 }, { "epoch": 0.4673507462686567, "grad_norm": 0.6559256663319056, "learning_rate": 8.712126912336631e-06, "loss": 0.1509, "step": 501 }, { "epoch": 0.46828358208955223, "grad_norm": 0.6631400312512453, "learning_rate": 8.707214712332227e-06, "loss": 0.1425, "step": 502 }, { "epoch": 0.46921641791044777, "grad_norm": 0.6921448101447667, "learning_rate": 8.702294552606815e-06, "loss": 0.1874, "step": 503 }, { "epoch": 0.4701492537313433, "grad_norm": 0.5845449928528333, "learning_rate": 8.697366443724424e-06, "loss": 0.1337, "step": 504 }, { "epoch": 0.4710820895522388, "grad_norm": 0.5935115801414959, "learning_rate": 8.692430396266138e-06, "loss": 0.1408, "step": 505 }, { "epoch": 0.47201492537313433, "grad_norm": 0.6803576439635525, "learning_rate": 8.687486420830093e-06, "loss": 0.1947, "step": 506 }, { "epoch": 0.47294776119402987, "grad_norm": 0.6317342917846218, "learning_rate": 8.682534528031447e-06, "loss": 0.1559, "step": 507 }, { "epoch": 0.47388059701492535, "grad_norm": 0.6514938093858352, "learning_rate": 8.677574728502355e-06, "loss": 0.1671, "step": 508 }, { "epoch": 0.4748134328358209, "grad_norm": 0.6921627296736316, "learning_rate": 8.67260703289195e-06, "loss": 0.178, "step": 509 }, { "epoch": 0.47574626865671643, "grad_norm": 0.6279053163130243, "learning_rate": 8.667631451866317e-06, "loss": 0.1392, "step": 510 }, { "epoch": 0.4766791044776119, "grad_norm": 0.6563975059374271, "learning_rate": 8.662647996108475e-06, "loss": 0.1527, "step": 511 }, { "epoch": 0.47761194029850745, "grad_norm": 0.6378676770524476, "learning_rate": 8.657656676318346e-06, "loss": 0.1426, "step": 512 }, { "epoch": 0.478544776119403, "grad_norm": 0.7113183223691943, "learning_rate": 8.65265750321274e-06, "loss": 0.1978, "step": 513 }, { "epoch": 0.47947761194029853, "grad_norm": 0.7005134146694462, "learning_rate": 8.64765048752533e-06, "loss": 0.1845, "step": 514 }, { "epoch": 0.480410447761194, "grad_norm": 0.6606154625302434, "learning_rate": 8.642635640006623e-06, "loss": 0.1579, "step": 515 }, { "epoch": 0.48134328358208955, "grad_norm": 0.587938322899775, "learning_rate": 8.637612971423945e-06, "loss": 0.1267, "step": 516 }, { "epoch": 0.4822761194029851, "grad_norm": 0.6508710998847475, "learning_rate": 8.632582492561414e-06, "loss": 0.1591, "step": 517 }, { "epoch": 0.4832089552238806, "grad_norm": 0.6497955926223947, "learning_rate": 8.627544214219918e-06, "loss": 0.1665, "step": 518 }, { "epoch": 0.4841417910447761, "grad_norm": 0.7009910435678934, "learning_rate": 8.622498147217091e-06, "loss": 0.1805, "step": 519 }, { "epoch": 0.48507462686567165, "grad_norm": 0.6434089294361077, "learning_rate": 8.617444302387288e-06, "loss": 0.1435, "step": 520 }, { "epoch": 0.48600746268656714, "grad_norm": 0.6596379312554564, "learning_rate": 8.612382690581567e-06, "loss": 0.1639, "step": 521 }, { "epoch": 0.4869402985074627, "grad_norm": 0.6856054951222558, "learning_rate": 8.607313322667657e-06, "loss": 0.173, "step": 522 }, { "epoch": 0.4878731343283582, "grad_norm": 0.6449637789750471, "learning_rate": 8.602236209529948e-06, "loss": 0.167, "step": 523 }, { "epoch": 0.48880597014925375, "grad_norm": 0.6404925239647499, "learning_rate": 8.597151362069452e-06, "loss": 0.163, "step": 524 }, { "epoch": 0.48973880597014924, "grad_norm": 0.6629899900904267, "learning_rate": 8.59205879120379e-06, "loss": 0.1485, "step": 525 }, { "epoch": 0.4906716417910448, "grad_norm": 0.656126487436752, "learning_rate": 8.58695850786717e-06, "loss": 0.1584, "step": 526 }, { "epoch": 0.4916044776119403, "grad_norm": 0.6824604320257147, "learning_rate": 8.581850523010353e-06, "loss": 0.1847, "step": 527 }, { "epoch": 0.4925373134328358, "grad_norm": 0.6213569215361795, "learning_rate": 8.576734847600639e-06, "loss": 0.1478, "step": 528 }, { "epoch": 0.49347014925373134, "grad_norm": 0.624598212086132, "learning_rate": 8.571611492621839e-06, "loss": 0.1414, "step": 529 }, { "epoch": 0.4944029850746269, "grad_norm": 0.6224857180968149, "learning_rate": 8.566480469074256e-06, "loss": 0.1549, "step": 530 }, { "epoch": 0.49533582089552236, "grad_norm": 0.7166285611956928, "learning_rate": 8.561341787974653e-06, "loss": 0.2066, "step": 531 }, { "epoch": 0.4962686567164179, "grad_norm": 0.6786380504256554, "learning_rate": 8.55619546035624e-06, "loss": 0.1744, "step": 532 }, { "epoch": 0.49720149253731344, "grad_norm": 0.6419337152468432, "learning_rate": 8.55104149726864e-06, "loss": 0.1685, "step": 533 }, { "epoch": 0.498134328358209, "grad_norm": 0.6165270063740435, "learning_rate": 8.545879909777872e-06, "loss": 0.1452, "step": 534 }, { "epoch": 0.49906716417910446, "grad_norm": 0.6368942512963577, "learning_rate": 8.540710708966326e-06, "loss": 0.1589, "step": 535 }, { "epoch": 0.5, "grad_norm": 0.6404100943933043, "learning_rate": 8.535533905932739e-06, "loss": 0.1461, "step": 536 }, { "epoch": 0.5009328358208955, "grad_norm": 0.6422862504282377, "learning_rate": 8.530349511792165e-06, "loss": 0.1541, "step": 537 }, { "epoch": 0.5018656716417911, "grad_norm": 0.6532925997800934, "learning_rate": 8.525157537675966e-06, "loss": 0.1676, "step": 538 }, { "epoch": 0.5027985074626866, "grad_norm": 0.6542308724763228, "learning_rate": 8.519957994731768e-06, "loss": 0.1523, "step": 539 }, { "epoch": 0.503731343283582, "grad_norm": 0.6193815566580989, "learning_rate": 8.514750894123463e-06, "loss": 0.1357, "step": 540 }, { "epoch": 0.5046641791044776, "grad_norm": 0.6189116678628227, "learning_rate": 8.509536247031152e-06, "loss": 0.1496, "step": 541 }, { "epoch": 0.5055970149253731, "grad_norm": 0.6282013640631606, "learning_rate": 8.504314064651154e-06, "loss": 0.147, "step": 542 }, { "epoch": 0.5065298507462687, "grad_norm": 0.6599728087556078, "learning_rate": 8.499084358195957e-06, "loss": 0.1742, "step": 543 }, { "epoch": 0.5074626865671642, "grad_norm": 0.6363755372508304, "learning_rate": 8.49384713889421e-06, "loss": 0.1689, "step": 544 }, { "epoch": 0.5083955223880597, "grad_norm": 0.6148383155064318, "learning_rate": 8.488602417990687e-06, "loss": 0.1384, "step": 545 }, { "epoch": 0.5093283582089553, "grad_norm": 0.7705288072991335, "learning_rate": 8.483350206746277e-06, "loss": 0.1644, "step": 546 }, { "epoch": 0.5102611940298507, "grad_norm": 0.6530450410840359, "learning_rate": 8.478090516437947e-06, "loss": 0.1651, "step": 547 }, { "epoch": 0.5111940298507462, "grad_norm": 0.6463829978348403, "learning_rate": 8.472823358358716e-06, "loss": 0.1558, "step": 548 }, { "epoch": 0.5121268656716418, "grad_norm": 0.6090191110531002, "learning_rate": 8.467548743817645e-06, "loss": 0.1535, "step": 549 }, { "epoch": 0.5130597014925373, "grad_norm": 0.6087687547873405, "learning_rate": 8.462266684139805e-06, "loss": 0.1379, "step": 550 }, { "epoch": 0.5139925373134329, "grad_norm": 0.6365316905918922, "learning_rate": 8.456977190666247e-06, "loss": 0.1499, "step": 551 }, { "epoch": 0.5149253731343284, "grad_norm": 0.6477952381124517, "learning_rate": 8.451680274753986e-06, "loss": 0.146, "step": 552 }, { "epoch": 0.5158582089552238, "grad_norm": 0.6325758835019507, "learning_rate": 8.446375947775976e-06, "loss": 0.15, "step": 553 }, { "epoch": 0.5167910447761194, "grad_norm": 0.5901748843103091, "learning_rate": 8.441064221121078e-06, "loss": 0.1249, "step": 554 }, { "epoch": 0.5177238805970149, "grad_norm": 0.6375847186347278, "learning_rate": 8.435745106194043e-06, "loss": 0.1534, "step": 555 }, { "epoch": 0.5186567164179104, "grad_norm": 0.6398354182527118, "learning_rate": 8.430418614415488e-06, "loss": 0.1712, "step": 556 }, { "epoch": 0.519589552238806, "grad_norm": 0.6475712211421351, "learning_rate": 8.425084757221864e-06, "loss": 0.1508, "step": 557 }, { "epoch": 0.5205223880597015, "grad_norm": 0.6530014376535488, "learning_rate": 8.419743546065442e-06, "loss": 0.1565, "step": 558 }, { "epoch": 0.5214552238805971, "grad_norm": 0.6980530071004158, "learning_rate": 8.414394992414276e-06, "loss": 0.1796, "step": 559 }, { "epoch": 0.5223880597014925, "grad_norm": 0.6293202042648803, "learning_rate": 8.40903910775219e-06, "loss": 0.136, "step": 560 }, { "epoch": 0.523320895522388, "grad_norm": 0.6646023363634793, "learning_rate": 8.403675903578745e-06, "loss": 0.1621, "step": 561 }, { "epoch": 0.5242537313432836, "grad_norm": 0.6893481606674301, "learning_rate": 8.398305391409221e-06, "loss": 0.1756, "step": 562 }, { "epoch": 0.5251865671641791, "grad_norm": 0.7165562716306684, "learning_rate": 8.392927582774586e-06, "loss": 0.1954, "step": 563 }, { "epoch": 0.5261194029850746, "grad_norm": 0.6842376292158515, "learning_rate": 8.387542489221477e-06, "loss": 0.1739, "step": 564 }, { "epoch": 0.5270522388059702, "grad_norm": 0.6898954935188116, "learning_rate": 8.38215012231217e-06, "loss": 0.1472, "step": 565 }, { "epoch": 0.5279850746268657, "grad_norm": 0.6338820405777343, "learning_rate": 8.376750493624556e-06, "loss": 0.1504, "step": 566 }, { "epoch": 0.5289179104477612, "grad_norm": 0.6528446403000787, "learning_rate": 8.371343614752124e-06, "loss": 0.1414, "step": 567 }, { "epoch": 0.5298507462686567, "grad_norm": 0.6765667392637296, "learning_rate": 8.36592949730392e-06, "loss": 0.1447, "step": 568 }, { "epoch": 0.5307835820895522, "grad_norm": 0.6535995284985252, "learning_rate": 8.360508152904544e-06, "loss": 0.1675, "step": 569 }, { "epoch": 0.5317164179104478, "grad_norm": 0.6675290570850319, "learning_rate": 8.355079593194102e-06, "loss": 0.1413, "step": 570 }, { "epoch": 0.5326492537313433, "grad_norm": 0.694809623360305, "learning_rate": 8.349643829828198e-06, "loss": 0.1663, "step": 571 }, { "epoch": 0.5335820895522388, "grad_norm": 0.6941609047498378, "learning_rate": 8.344200874477901e-06, "loss": 0.1679, "step": 572 }, { "epoch": 0.5345149253731343, "grad_norm": 0.660756343073272, "learning_rate": 8.338750738829723e-06, "loss": 0.1757, "step": 573 }, { "epoch": 0.5354477611940298, "grad_norm": 0.6181906635514727, "learning_rate": 8.33329343458559e-06, "loss": 0.1536, "step": 574 }, { "epoch": 0.5363805970149254, "grad_norm": 0.6824926037511092, "learning_rate": 8.327828973462823e-06, "loss": 0.19, "step": 575 }, { "epoch": 0.5373134328358209, "grad_norm": 0.6329288449423699, "learning_rate": 8.32235736719411e-06, "loss": 0.1476, "step": 576 }, { "epoch": 0.5382462686567164, "grad_norm": 0.6200440728183669, "learning_rate": 8.316878627527474e-06, "loss": 0.151, "step": 577 }, { "epoch": 0.539179104477612, "grad_norm": 0.6622252962270359, "learning_rate": 8.311392766226261e-06, "loss": 0.1656, "step": 578 }, { "epoch": 0.5401119402985075, "grad_norm": 0.6050649206007422, "learning_rate": 8.305899795069102e-06, "loss": 0.152, "step": 579 }, { "epoch": 0.5410447761194029, "grad_norm": 0.6268084301569957, "learning_rate": 8.300399725849902e-06, "loss": 0.1512, "step": 580 }, { "epoch": 0.5419776119402985, "grad_norm": 0.6520742169363384, "learning_rate": 8.294892570377794e-06, "loss": 0.1738, "step": 581 }, { "epoch": 0.542910447761194, "grad_norm": 0.5970500293955573, "learning_rate": 8.289378340477138e-06, "loss": 0.1281, "step": 582 }, { "epoch": 0.5438432835820896, "grad_norm": 0.6332361240377113, "learning_rate": 8.283857047987475e-06, "loss": 0.1416, "step": 583 }, { "epoch": 0.5447761194029851, "grad_norm": 0.6415153415207774, "learning_rate": 8.278328704763516e-06, "loss": 0.1404, "step": 584 }, { "epoch": 0.5457089552238806, "grad_norm": 0.5974948449455182, "learning_rate": 8.272793322675103e-06, "loss": 0.1456, "step": 585 }, { "epoch": 0.5466417910447762, "grad_norm": 0.6129926272963674, "learning_rate": 8.2672509136072e-06, "loss": 0.1433, "step": 586 }, { "epoch": 0.5475746268656716, "grad_norm": 0.6352972160740916, "learning_rate": 8.261701489459852e-06, "loss": 0.145, "step": 587 }, { "epoch": 0.5485074626865671, "grad_norm": 0.6977622913110467, "learning_rate": 8.256145062148168e-06, "loss": 0.166, "step": 588 }, { "epoch": 0.5494402985074627, "grad_norm": 0.7066698503168559, "learning_rate": 8.250581643602293e-06, "loss": 0.1966, "step": 589 }, { "epoch": 0.5503731343283582, "grad_norm": 0.6623090590035278, "learning_rate": 8.245011245767385e-06, "loss": 0.1561, "step": 590 }, { "epoch": 0.5513059701492538, "grad_norm": 0.718476327621939, "learning_rate": 8.239433880603585e-06, "loss": 0.2019, "step": 591 }, { "epoch": 0.5522388059701493, "grad_norm": 0.6523397652197293, "learning_rate": 8.233849560085994e-06, "loss": 0.1696, "step": 592 }, { "epoch": 0.5531716417910447, "grad_norm": 0.6415940935025244, "learning_rate": 8.228258296204647e-06, "loss": 0.1597, "step": 593 }, { "epoch": 0.5541044776119403, "grad_norm": 0.6887079011019287, "learning_rate": 8.222660100964487e-06, "loss": 0.164, "step": 594 }, { "epoch": 0.5550373134328358, "grad_norm": 0.7153183750875622, "learning_rate": 8.217054986385336e-06, "loss": 0.1875, "step": 595 }, { "epoch": 0.5559701492537313, "grad_norm": 0.6465565361029906, "learning_rate": 8.211442964501879e-06, "loss": 0.1495, "step": 596 }, { "epoch": 0.5569029850746269, "grad_norm": 0.6220200929313112, "learning_rate": 8.205824047363627e-06, "loss": 0.1375, "step": 597 }, { "epoch": 0.5578358208955224, "grad_norm": 0.6663068774544811, "learning_rate": 8.200198247034897e-06, "loss": 0.1641, "step": 598 }, { "epoch": 0.558768656716418, "grad_norm": 0.6488194657729722, "learning_rate": 8.194565575594784e-06, "loss": 0.1531, "step": 599 }, { "epoch": 0.5597014925373134, "grad_norm": 0.6602191397217592, "learning_rate": 8.188926045137139e-06, "loss": 0.1681, "step": 600 }, { "epoch": 0.5606343283582089, "grad_norm": 0.6306096265066752, "learning_rate": 8.183279667770534e-06, "loss": 0.1359, "step": 601 }, { "epoch": 0.5615671641791045, "grad_norm": 0.6848393882553127, "learning_rate": 8.177626455618245e-06, "loss": 0.1759, "step": 602 }, { "epoch": 0.5625, "grad_norm": 0.6982848979938182, "learning_rate": 8.171966420818227e-06, "loss": 0.2003, "step": 603 }, { "epoch": 0.5634328358208955, "grad_norm": 0.6602856504051235, "learning_rate": 8.166299575523081e-06, "loss": 0.1538, "step": 604 }, { "epoch": 0.5643656716417911, "grad_norm": 0.6566995205602327, "learning_rate": 8.160625931900022e-06, "loss": 0.1621, "step": 605 }, { "epoch": 0.5652985074626866, "grad_norm": 0.7425865751583706, "learning_rate": 8.154945502130877e-06, "loss": 0.1621, "step": 606 }, { "epoch": 0.566231343283582, "grad_norm": 0.6073134332284943, "learning_rate": 8.149258298412033e-06, "loss": 0.1436, "step": 607 }, { "epoch": 0.5671641791044776, "grad_norm": 0.6747901463397569, "learning_rate": 8.143564332954426e-06, "loss": 0.1828, "step": 608 }, { "epoch": 0.5680970149253731, "grad_norm": 0.6288397138092399, "learning_rate": 8.137863617983506e-06, "loss": 0.1414, "step": 609 }, { "epoch": 0.5690298507462687, "grad_norm": 0.6736879515208554, "learning_rate": 8.132156165739216e-06, "loss": 0.1848, "step": 610 }, { "epoch": 0.5699626865671642, "grad_norm": 0.6336869162892023, "learning_rate": 8.12644198847597e-06, "loss": 0.127, "step": 611 }, { "epoch": 0.5708955223880597, "grad_norm": 0.5825195344233711, "learning_rate": 8.120721098462612e-06, "loss": 0.1307, "step": 612 }, { "epoch": 0.5718283582089553, "grad_norm": 0.6531507333277544, "learning_rate": 8.114993507982408e-06, "loss": 0.1406, "step": 613 }, { "epoch": 0.5727611940298507, "grad_norm": 0.6874230886247001, "learning_rate": 8.109259229333005e-06, "loss": 0.1745, "step": 614 }, { "epoch": 0.5736940298507462, "grad_norm": 0.6391075089456794, "learning_rate": 8.103518274826408e-06, "loss": 0.1435, "step": 615 }, { "epoch": 0.5746268656716418, "grad_norm": 0.615333968232552, "learning_rate": 8.097770656788961e-06, "loss": 0.1458, "step": 616 }, { "epoch": 0.5755597014925373, "grad_norm": 0.6817815660116607, "learning_rate": 8.092016387561316e-06, "loss": 0.1628, "step": 617 }, { "epoch": 0.5764925373134329, "grad_norm": 0.7011186911898641, "learning_rate": 8.086255479498398e-06, "loss": 0.1951, "step": 618 }, { "epoch": 0.5774253731343284, "grad_norm": 0.6694445034257456, "learning_rate": 8.080487944969395e-06, "loss": 0.1777, "step": 619 }, { "epoch": 0.5783582089552238, "grad_norm": 0.6756863041793346, "learning_rate": 8.074713796357717e-06, "loss": 0.165, "step": 620 }, { "epoch": 0.5792910447761194, "grad_norm": 0.6271466841297935, "learning_rate": 8.068933046060976e-06, "loss": 0.1652, "step": 621 }, { "epoch": 0.5802238805970149, "grad_norm": 0.7246578149444074, "learning_rate": 8.063145706490961e-06, "loss": 0.2006, "step": 622 }, { "epoch": 0.5811567164179104, "grad_norm": 0.6794780282065498, "learning_rate": 8.057351790073601e-06, "loss": 0.1469, "step": 623 }, { "epoch": 0.582089552238806, "grad_norm": 0.6502078226265429, "learning_rate": 8.051551309248961e-06, "loss": 0.1664, "step": 624 }, { "epoch": 0.5830223880597015, "grad_norm": 0.6800083170207186, "learning_rate": 8.045744276471185e-06, "loss": 0.1792, "step": 625 }, { "epoch": 0.5839552238805971, "grad_norm": 0.7267551767665674, "learning_rate": 8.039930704208492e-06, "loss": 0.2, "step": 626 }, { "epoch": 0.5848880597014925, "grad_norm": 0.6351886529473169, "learning_rate": 8.034110604943144e-06, "loss": 0.1512, "step": 627 }, { "epoch": 0.585820895522388, "grad_norm": 0.7075141902176935, "learning_rate": 8.028283991171408e-06, "loss": 0.1587, "step": 628 }, { "epoch": 0.5867537313432836, "grad_norm": 0.6481183047064885, "learning_rate": 8.02245087540355e-06, "loss": 0.1371, "step": 629 }, { "epoch": 0.5876865671641791, "grad_norm": 0.6589825628789143, "learning_rate": 8.016611270163783e-06, "loss": 0.1543, "step": 630 }, { "epoch": 0.5886194029850746, "grad_norm": 0.6578970796978433, "learning_rate": 8.010765187990268e-06, "loss": 0.1561, "step": 631 }, { "epoch": 0.5895522388059702, "grad_norm": 0.768771721033516, "learning_rate": 8.004912641435064e-06, "loss": 0.2153, "step": 632 }, { "epoch": 0.5904850746268657, "grad_norm": 0.6218645259943533, "learning_rate": 7.999053643064108e-06, "loss": 0.1382, "step": 633 }, { "epoch": 0.5914179104477612, "grad_norm": 0.7393097592125685, "learning_rate": 7.993188205457195e-06, "loss": 0.1781, "step": 634 }, { "epoch": 0.5923507462686567, "grad_norm": 0.6790075708637042, "learning_rate": 7.987316341207942e-06, "loss": 0.1478, "step": 635 }, { "epoch": 0.5932835820895522, "grad_norm": 0.652493715295722, "learning_rate": 7.981438062923767e-06, "loss": 0.1482, "step": 636 }, { "epoch": 0.5942164179104478, "grad_norm": 0.7047880880678135, "learning_rate": 7.975553383225857e-06, "loss": 0.2048, "step": 637 }, { "epoch": 0.5951492537313433, "grad_norm": 0.6676767212061754, "learning_rate": 7.969662314749148e-06, "loss": 0.1881, "step": 638 }, { "epoch": 0.5960820895522388, "grad_norm": 0.7009073559530836, "learning_rate": 7.963764870142286e-06, "loss": 0.1797, "step": 639 }, { "epoch": 0.5970149253731343, "grad_norm": 0.6733155692109827, "learning_rate": 7.957861062067614e-06, "loss": 0.1876, "step": 640 }, { "epoch": 0.5979477611940298, "grad_norm": 0.6375001684264536, "learning_rate": 7.951950903201133e-06, "loss": 0.1364, "step": 641 }, { "epoch": 0.5988805970149254, "grad_norm": 0.6838725891470462, "learning_rate": 7.946034406232481e-06, "loss": 0.1571, "step": 642 }, { "epoch": 0.5998134328358209, "grad_norm": 0.5873545150382838, "learning_rate": 7.940111583864909e-06, "loss": 0.1327, "step": 643 }, { "epoch": 0.6007462686567164, "grad_norm": 0.6223782632374885, "learning_rate": 7.934182448815244e-06, "loss": 0.1451, "step": 644 }, { "epoch": 0.601679104477612, "grad_norm": 0.644288600521529, "learning_rate": 7.928247013813867e-06, "loss": 0.1521, "step": 645 }, { "epoch": 0.6026119402985075, "grad_norm": 0.6954823000886735, "learning_rate": 7.922305291604688e-06, "loss": 0.1977, "step": 646 }, { "epoch": 0.6035447761194029, "grad_norm": 0.6939048217455536, "learning_rate": 7.916357294945116e-06, "loss": 0.1607, "step": 647 }, { "epoch": 0.6044776119402985, "grad_norm": 0.6284412307415721, "learning_rate": 7.910403036606028e-06, "loss": 0.1364, "step": 648 }, { "epoch": 0.605410447761194, "grad_norm": 0.6113239802774686, "learning_rate": 7.90444252937175e-06, "loss": 0.1483, "step": 649 }, { "epoch": 0.6063432835820896, "grad_norm": 0.6569491138363276, "learning_rate": 7.898475786040025e-06, "loss": 0.1546, "step": 650 }, { "epoch": 0.6072761194029851, "grad_norm": 0.6826840661797282, "learning_rate": 7.892502819421979e-06, "loss": 0.1791, "step": 651 }, { "epoch": 0.6082089552238806, "grad_norm": 0.6543834694823215, "learning_rate": 7.88652364234211e-06, "loss": 0.1586, "step": 652 }, { "epoch": 0.6091417910447762, "grad_norm": 0.6411876603611992, "learning_rate": 7.880538267638243e-06, "loss": 0.1642, "step": 653 }, { "epoch": 0.6100746268656716, "grad_norm": 0.6113679739042378, "learning_rate": 7.874546708161512e-06, "loss": 0.1391, "step": 654 }, { "epoch": 0.6110074626865671, "grad_norm": 0.634491884213112, "learning_rate": 7.868548976776328e-06, "loss": 0.1769, "step": 655 }, { "epoch": 0.6119402985074627, "grad_norm": 0.6390570621037459, "learning_rate": 7.86254508636036e-06, "loss": 0.1844, "step": 656 }, { "epoch": 0.6128731343283582, "grad_norm": 0.6071631108027539, "learning_rate": 7.856535049804495e-06, "loss": 0.1252, "step": 657 }, { "epoch": 0.6138059701492538, "grad_norm": 0.6474165376422891, "learning_rate": 7.850518880012815e-06, "loss": 0.1651, "step": 658 }, { "epoch": 0.6147388059701493, "grad_norm": 0.6272433834312396, "learning_rate": 7.844496589902577e-06, "loss": 0.1501, "step": 659 }, { "epoch": 0.6156716417910447, "grad_norm": 0.6777049813651082, "learning_rate": 7.838468192404176e-06, "loss": 0.1778, "step": 660 }, { "epoch": 0.6166044776119403, "grad_norm": 0.6561522731105978, "learning_rate": 7.83243370046112e-06, "loss": 0.1856, "step": 661 }, { "epoch": 0.6175373134328358, "grad_norm": 0.6090505791700408, "learning_rate": 7.826393127029998e-06, "loss": 0.1268, "step": 662 }, { "epoch": 0.6184701492537313, "grad_norm": 0.65240223246289, "learning_rate": 7.820346485080466e-06, "loss": 0.1566, "step": 663 }, { "epoch": 0.6194029850746269, "grad_norm": 0.7024329563698646, "learning_rate": 7.814293787595197e-06, "loss": 0.1659, "step": 664 }, { "epoch": 0.6203358208955224, "grad_norm": 0.6981388664050941, "learning_rate": 7.80823504756988e-06, "loss": 0.1598, "step": 665 }, { "epoch": 0.621268656716418, "grad_norm": 0.583497402108226, "learning_rate": 7.80217027801317e-06, "loss": 0.1302, "step": 666 }, { "epoch": 0.6222014925373134, "grad_norm": 0.6271321723762319, "learning_rate": 7.796099491946665e-06, "loss": 0.1318, "step": 667 }, { "epoch": 0.6231343283582089, "grad_norm": 0.6380553552634389, "learning_rate": 7.790022702404887e-06, "loss": 0.144, "step": 668 }, { "epoch": 0.6240671641791045, "grad_norm": 0.6920513987833131, "learning_rate": 7.783939922435244e-06, "loss": 0.1866, "step": 669 }, { "epoch": 0.625, "grad_norm": 0.6761833099863712, "learning_rate": 7.777851165098012e-06, "loss": 0.169, "step": 670 }, { "epoch": 0.6259328358208955, "grad_norm": 0.6698331039947517, "learning_rate": 7.771756443466292e-06, "loss": 0.1746, "step": 671 }, { "epoch": 0.6268656716417911, "grad_norm": 0.6799148941999966, "learning_rate": 7.765655770625997e-06, "loss": 0.1713, "step": 672 }, { "epoch": 0.6277985074626866, "grad_norm": 0.615204052802436, "learning_rate": 7.759549159675819e-06, "loss": 0.1317, "step": 673 }, { "epoch": 0.628731343283582, "grad_norm": 0.6181628299331868, "learning_rate": 7.753436623727193e-06, "loss": 0.1279, "step": 674 }, { "epoch": 0.6296641791044776, "grad_norm": 0.6467900454994387, "learning_rate": 7.747318175904281e-06, "loss": 0.1555, "step": 675 }, { "epoch": 0.6305970149253731, "grad_norm": 0.6499038275796759, "learning_rate": 7.741193829343937e-06, "loss": 0.1512, "step": 676 }, { "epoch": 0.6315298507462687, "grad_norm": 0.6936269349726135, "learning_rate": 7.73506359719568e-06, "loss": 0.171, "step": 677 }, { "epoch": 0.6324626865671642, "grad_norm": 0.6068947529231038, "learning_rate": 7.728927492621665e-06, "loss": 0.1368, "step": 678 }, { "epoch": 0.6333955223880597, "grad_norm": 0.6780200707920547, "learning_rate": 7.722785528796657e-06, "loss": 0.1594, "step": 679 }, { "epoch": 0.6343283582089553, "grad_norm": 0.6562378211147659, "learning_rate": 7.716637718908002e-06, "loss": 0.1512, "step": 680 }, { "epoch": 0.6352611940298507, "grad_norm": 0.6218042213488317, "learning_rate": 7.710484076155595e-06, "loss": 0.1341, "step": 681 }, { "epoch": 0.6361940298507462, "grad_norm": 0.6623685938609211, "learning_rate": 7.704324613751856e-06, "loss": 0.1528, "step": 682 }, { "epoch": 0.6371268656716418, "grad_norm": 0.6213299419722949, "learning_rate": 7.698159344921704e-06, "loss": 0.1391, "step": 683 }, { "epoch": 0.6380597014925373, "grad_norm": 0.6799504513106223, "learning_rate": 7.691988282902519e-06, "loss": 0.1969, "step": 684 }, { "epoch": 0.6389925373134329, "grad_norm": 0.682040554981312, "learning_rate": 7.685811440944121e-06, "loss": 0.1585, "step": 685 }, { "epoch": 0.6399253731343284, "grad_norm": 0.8064568817649423, "learning_rate": 7.679628832308743e-06, "loss": 0.1972, "step": 686 }, { "epoch": 0.6408582089552238, "grad_norm": 0.586026460062497, "learning_rate": 7.673440470270998e-06, "loss": 0.1256, "step": 687 }, { "epoch": 0.6417910447761194, "grad_norm": 0.6719755469315377, "learning_rate": 7.667246368117852e-06, "loss": 0.1892, "step": 688 }, { "epoch": 0.6427238805970149, "grad_norm": 0.6071848610771641, "learning_rate": 7.661046539148596e-06, "loss": 0.1473, "step": 689 }, { "epoch": 0.6436567164179104, "grad_norm": 0.6300009224343381, "learning_rate": 7.654840996674813e-06, "loss": 0.1647, "step": 690 }, { "epoch": 0.644589552238806, "grad_norm": 0.6338677478909487, "learning_rate": 7.648629754020359e-06, "loss": 0.1375, "step": 691 }, { "epoch": 0.6455223880597015, "grad_norm": 0.6152032484286838, "learning_rate": 7.642412824521328e-06, "loss": 0.1394, "step": 692 }, { "epoch": 0.6464552238805971, "grad_norm": 0.7349025854782087, "learning_rate": 7.636190221526022e-06, "loss": 0.21, "step": 693 }, { "epoch": 0.6473880597014925, "grad_norm": 0.6884034469579194, "learning_rate": 7.629961958394923e-06, "loss": 0.1753, "step": 694 }, { "epoch": 0.648320895522388, "grad_norm": 0.6815485352951477, "learning_rate": 7.623728048500669e-06, "loss": 0.1795, "step": 695 }, { "epoch": 0.6492537313432836, "grad_norm": 0.699644633797632, "learning_rate": 7.617488505228023e-06, "loss": 0.1768, "step": 696 }, { "epoch": 0.6501865671641791, "grad_norm": 0.6373295211607588, "learning_rate": 7.611243341973839e-06, "loss": 0.1531, "step": 697 }, { "epoch": 0.6511194029850746, "grad_norm": 0.5556263183679617, "learning_rate": 7.6049925721470455e-06, "loss": 0.1248, "step": 698 }, { "epoch": 0.6520522388059702, "grad_norm": 0.6522170278741339, "learning_rate": 7.598736209168595e-06, "loss": 0.1686, "step": 699 }, { "epoch": 0.6529850746268657, "grad_norm": 0.648757916437683, "learning_rate": 7.592474266471464e-06, "loss": 0.1651, "step": 700 }, { "epoch": 0.6539179104477612, "grad_norm": 0.619135309120307, "learning_rate": 7.5862067575006e-06, "loss": 0.1224, "step": 701 }, { "epoch": 0.6548507462686567, "grad_norm": 0.6642134753375971, "learning_rate": 7.579933695712905e-06, "loss": 0.1746, "step": 702 }, { "epoch": 0.6557835820895522, "grad_norm": 0.6778236742379958, "learning_rate": 7.573655094577204e-06, "loss": 0.1911, "step": 703 }, { "epoch": 0.6567164179104478, "grad_norm": 0.6080111801884874, "learning_rate": 7.56737096757421e-06, "loss": 0.1335, "step": 704 }, { "epoch": 0.6576492537313433, "grad_norm": 0.6169693167618339, "learning_rate": 7.56108132819651e-06, "loss": 0.1456, "step": 705 }, { "epoch": 0.6585820895522388, "grad_norm": 0.681662368323834, "learning_rate": 7.5547861899485175e-06, "loss": 0.1738, "step": 706 }, { "epoch": 0.6595149253731343, "grad_norm": 0.6532161834644604, "learning_rate": 7.5484855663464595e-06, "loss": 0.1765, "step": 707 }, { "epoch": 0.6604477611940298, "grad_norm": 0.5961084637412081, "learning_rate": 7.542179470918336e-06, "loss": 0.1431, "step": 708 }, { "epoch": 0.6613805970149254, "grad_norm": 0.6378114407693336, "learning_rate": 7.535867917203897e-06, "loss": 0.1497, "step": 709 }, { "epoch": 0.6623134328358209, "grad_norm": 0.7088522093387044, "learning_rate": 7.529550918754609e-06, "loss": 0.194, "step": 710 }, { "epoch": 0.6632462686567164, "grad_norm": 0.652083306837165, "learning_rate": 7.523228489133639e-06, "loss": 0.1741, "step": 711 }, { "epoch": 0.664179104477612, "grad_norm": 0.6776314041635515, "learning_rate": 7.5169006419157985e-06, "loss": 0.1839, "step": 712 }, { "epoch": 0.6651119402985075, "grad_norm": 0.6724452095790415, "learning_rate": 7.510567390687549e-06, "loss": 0.1801, "step": 713 }, { "epoch": 0.6660447761194029, "grad_norm": 0.6835246451460584, "learning_rate": 7.504228749046941e-06, "loss": 0.1841, "step": 714 }, { "epoch": 0.6669776119402985, "grad_norm": 0.6128690148868979, "learning_rate": 7.497884730603608e-06, "loss": 0.135, "step": 715 }, { "epoch": 0.667910447761194, "grad_norm": 0.609473235625601, "learning_rate": 7.491535348978719e-06, "loss": 0.1472, "step": 716 }, { "epoch": 0.6688432835820896, "grad_norm": 0.6483236111524605, "learning_rate": 7.485180617804968e-06, "loss": 0.1522, "step": 717 }, { "epoch": 0.6697761194029851, "grad_norm": 0.6677914824468666, "learning_rate": 7.478820550726528e-06, "loss": 0.1761, "step": 718 }, { "epoch": 0.6707089552238806, "grad_norm": 0.6908308239774391, "learning_rate": 7.472455161399031e-06, "loss": 0.1828, "step": 719 }, { "epoch": 0.6716417910447762, "grad_norm": 0.6800582378373765, "learning_rate": 7.466084463489537e-06, "loss": 0.1647, "step": 720 }, { "epoch": 0.6725746268656716, "grad_norm": 0.7169312289988835, "learning_rate": 7.459708470676504e-06, "loss": 0.1621, "step": 721 }, { "epoch": 0.6735074626865671, "grad_norm": 0.6480866624576451, "learning_rate": 7.453327196649756e-06, "loss": 0.1494, "step": 722 }, { "epoch": 0.6744402985074627, "grad_norm": 0.633242688006931, "learning_rate": 7.446940655110457e-06, "loss": 0.1362, "step": 723 }, { "epoch": 0.6753731343283582, "grad_norm": 0.6664444073821739, "learning_rate": 7.440548859771086e-06, "loss": 0.1565, "step": 724 }, { "epoch": 0.6763059701492538, "grad_norm": 0.6897824169269828, "learning_rate": 7.434151824355396e-06, "loss": 0.1623, "step": 725 }, { "epoch": 0.6772388059701493, "grad_norm": 0.6538422408399237, "learning_rate": 7.4277495625983916e-06, "loss": 0.1665, "step": 726 }, { "epoch": 0.6781716417910447, "grad_norm": 0.6945783713023107, "learning_rate": 7.421342088246304e-06, "loss": 0.1771, "step": 727 }, { "epoch": 0.6791044776119403, "grad_norm": 0.6228839740858083, "learning_rate": 7.414929415056551e-06, "loss": 0.1489, "step": 728 }, { "epoch": 0.6800373134328358, "grad_norm": 0.6487499758280134, "learning_rate": 7.408511556797714e-06, "loss": 0.1718, "step": 729 }, { "epoch": 0.6809701492537313, "grad_norm": 0.7171870437261106, "learning_rate": 7.402088527249508e-06, "loss": 0.2081, "step": 730 }, { "epoch": 0.6819029850746269, "grad_norm": 0.6468684146965867, "learning_rate": 7.395660340202752e-06, "loss": 0.1392, "step": 731 }, { "epoch": 0.6828358208955224, "grad_norm": 0.6414240881741015, "learning_rate": 7.389227009459335e-06, "loss": 0.1483, "step": 732 }, { "epoch": 0.683768656716418, "grad_norm": 0.696740424266178, "learning_rate": 7.382788548832196e-06, "loss": 0.1909, "step": 733 }, { "epoch": 0.6847014925373134, "grad_norm": 0.6212826335283792, "learning_rate": 7.3763449721452815e-06, "loss": 0.1371, "step": 734 }, { "epoch": 0.6856343283582089, "grad_norm": 0.637805951272767, "learning_rate": 7.369896293233531e-06, "loss": 0.1636, "step": 735 }, { "epoch": 0.6865671641791045, "grad_norm": 0.6581420798365663, "learning_rate": 7.363442525942827e-06, "loss": 0.1424, "step": 736 }, { "epoch": 0.6875, "grad_norm": 0.5899624254002662, "learning_rate": 7.3569836841299905e-06, "loss": 0.1303, "step": 737 }, { "epoch": 0.6884328358208955, "grad_norm": 0.6766974958014371, "learning_rate": 7.350519781662726e-06, "loss": 0.1861, "step": 738 }, { "epoch": 0.6893656716417911, "grad_norm": 0.7246439838666868, "learning_rate": 7.3440508324196126e-06, "loss": 0.1854, "step": 739 }, { "epoch": 0.6902985074626866, "grad_norm": 0.6467442794646233, "learning_rate": 7.3375768502900626e-06, "loss": 0.1571, "step": 740 }, { "epoch": 0.691231343283582, "grad_norm": 0.6569412052934334, "learning_rate": 7.331097849174292e-06, "loss": 0.1575, "step": 741 }, { "epoch": 0.6921641791044776, "grad_norm": 0.6320570750893084, "learning_rate": 7.3246138429832945e-06, "loss": 0.1493, "step": 742 }, { "epoch": 0.6930970149253731, "grad_norm": 0.653514019763124, "learning_rate": 7.3181248456388124e-06, "loss": 0.1458, "step": 743 }, { "epoch": 0.6940298507462687, "grad_norm": 0.6338571555056041, "learning_rate": 7.311630871073301e-06, "loss": 0.1354, "step": 744 }, { "epoch": 0.6949626865671642, "grad_norm": 0.6697972111059602, "learning_rate": 7.305131933229902e-06, "loss": 0.1895, "step": 745 }, { "epoch": 0.6958955223880597, "grad_norm": 0.6675279540030541, "learning_rate": 7.298628046062417e-06, "loss": 0.1536, "step": 746 }, { "epoch": 0.6968283582089553, "grad_norm": 0.613903314802864, "learning_rate": 7.292119223535273e-06, "loss": 0.135, "step": 747 }, { "epoch": 0.6977611940298507, "grad_norm": 0.705774550235947, "learning_rate": 7.2856054796234944e-06, "loss": 0.2094, "step": 748 }, { "epoch": 0.6986940298507462, "grad_norm": 0.6478848515443824, "learning_rate": 7.279086828312666e-06, "loss": 0.1428, "step": 749 }, { "epoch": 0.6996268656716418, "grad_norm": 0.6031339601855319, "learning_rate": 7.272563283598918e-06, "loss": 0.1387, "step": 750 }, { "epoch": 0.7005597014925373, "grad_norm": 0.6149720053005814, "learning_rate": 7.266034859488883e-06, "loss": 0.1412, "step": 751 }, { "epoch": 0.7014925373134329, "grad_norm": 0.7132902743888663, "learning_rate": 7.25950156999967e-06, "loss": 0.1785, "step": 752 }, { "epoch": 0.7024253731343284, "grad_norm": 0.5816084816886081, "learning_rate": 7.252963429158835e-06, "loss": 0.1289, "step": 753 }, { "epoch": 0.7033582089552238, "grad_norm": 0.6623049543048523, "learning_rate": 7.246420451004352e-06, "loss": 0.1594, "step": 754 }, { "epoch": 0.7042910447761194, "grad_norm": 0.6397208897004008, "learning_rate": 7.239872649584574e-06, "loss": 0.1607, "step": 755 }, { "epoch": 0.7052238805970149, "grad_norm": 0.6842731818205605, "learning_rate": 7.23332003895822e-06, "loss": 0.1571, "step": 756 }, { "epoch": 0.7061567164179104, "grad_norm": 0.6012179602342932, "learning_rate": 7.226762633194331e-06, "loss": 0.1255, "step": 757 }, { "epoch": 0.707089552238806, "grad_norm": 0.6869825316913156, "learning_rate": 7.220200446372239e-06, "loss": 0.1789, "step": 758 }, { "epoch": 0.7080223880597015, "grad_norm": 0.7756556233424733, "learning_rate": 7.2136334925815455e-06, "loss": 0.2211, "step": 759 }, { "epoch": 0.7089552238805971, "grad_norm": 0.6976563205003611, "learning_rate": 7.207061785922089e-06, "loss": 0.1994, "step": 760 }, { "epoch": 0.7098880597014925, "grad_norm": 0.6054181920619668, "learning_rate": 7.20048534050391e-06, "loss": 0.1387, "step": 761 }, { "epoch": 0.710820895522388, "grad_norm": 0.6167762110602092, "learning_rate": 7.193904170447223e-06, "loss": 0.1274, "step": 762 }, { "epoch": 0.7117537313432836, "grad_norm": 0.6877866321903767, "learning_rate": 7.187318289882387e-06, "loss": 0.1735, "step": 763 }, { "epoch": 0.7126865671641791, "grad_norm": 0.6476113819619379, "learning_rate": 7.1807277129498774e-06, "loss": 0.1444, "step": 764 }, { "epoch": 0.7136194029850746, "grad_norm": 0.6331644148158897, "learning_rate": 7.17413245380025e-06, "loss": 0.1642, "step": 765 }, { "epoch": 0.7145522388059702, "grad_norm": 0.6686890921739097, "learning_rate": 7.167532526594116e-06, "loss": 0.1713, "step": 766 }, { "epoch": 0.7154850746268657, "grad_norm": 0.6702951739951224, "learning_rate": 7.160927945502109e-06, "loss": 0.1594, "step": 767 }, { "epoch": 0.7164179104477612, "grad_norm": 0.6280677774016477, "learning_rate": 7.1543187247048525e-06, "loss": 0.1447, "step": 768 }, { "epoch": 0.7173507462686567, "grad_norm": 0.7031442401542073, "learning_rate": 7.147704878392935e-06, "loss": 0.2063, "step": 769 }, { "epoch": 0.7182835820895522, "grad_norm": 0.5985136540067333, "learning_rate": 7.141086420766875e-06, "loss": 0.1316, "step": 770 }, { "epoch": 0.7192164179104478, "grad_norm": 0.7011344478243424, "learning_rate": 7.134463366037091e-06, "loss": 0.2091, "step": 771 }, { "epoch": 0.7201492537313433, "grad_norm": 0.6162125306442363, "learning_rate": 7.1278357284238745e-06, "loss": 0.1337, "step": 772 }, { "epoch": 0.7210820895522388, "grad_norm": 0.6009837316923062, "learning_rate": 7.121203522157354e-06, "loss": 0.1326, "step": 773 }, { "epoch": 0.7220149253731343, "grad_norm": 0.6596988485503998, "learning_rate": 7.114566761477468e-06, "loss": 0.1839, "step": 774 }, { "epoch": 0.7229477611940298, "grad_norm": 0.6391592147492244, "learning_rate": 7.107925460633936e-06, "loss": 0.1599, "step": 775 }, { "epoch": 0.7238805970149254, "grad_norm": 0.6715713799750553, "learning_rate": 7.101279633886222e-06, "loss": 0.1796, "step": 776 }, { "epoch": 0.7248134328358209, "grad_norm": 0.650649415617641, "learning_rate": 7.094629295503513e-06, "loss": 0.1774, "step": 777 }, { "epoch": 0.7257462686567164, "grad_norm": 0.626861111985669, "learning_rate": 7.087974459764675e-06, "loss": 0.1504, "step": 778 }, { "epoch": 0.726679104477612, "grad_norm": 0.6313654349761597, "learning_rate": 7.081315140958236e-06, "loss": 0.1346, "step": 779 }, { "epoch": 0.7276119402985075, "grad_norm": 0.6333080301906928, "learning_rate": 7.074651353382349e-06, "loss": 0.1322, "step": 780 }, { "epoch": 0.7285447761194029, "grad_norm": 0.5795525341491343, "learning_rate": 7.067983111344762e-06, "loss": 0.1258, "step": 781 }, { "epoch": 0.7294776119402985, "grad_norm": 0.6684992009138307, "learning_rate": 7.061310429162782e-06, "loss": 0.1761, "step": 782 }, { "epoch": 0.730410447761194, "grad_norm": 0.6842907332172069, "learning_rate": 7.054633321163258e-06, "loss": 0.1795, "step": 783 }, { "epoch": 0.7313432835820896, "grad_norm": 0.6694706180912062, "learning_rate": 7.047951801682533e-06, "loss": 0.141, "step": 784 }, { "epoch": 0.7322761194029851, "grad_norm": 0.6803094501188375, "learning_rate": 7.041265885066428e-06, "loss": 0.167, "step": 785 }, { "epoch": 0.7332089552238806, "grad_norm": 0.6978047118691656, "learning_rate": 7.034575585670205e-06, "loss": 0.1831, "step": 786 }, { "epoch": 0.7341417910447762, "grad_norm": 0.6992709797768903, "learning_rate": 7.027880917858529e-06, "loss": 0.1953, "step": 787 }, { "epoch": 0.7350746268656716, "grad_norm": 0.6578974878594073, "learning_rate": 7.021181896005456e-06, "loss": 0.1673, "step": 788 }, { "epoch": 0.7360074626865671, "grad_norm": 0.6444601500562578, "learning_rate": 7.014478534494378e-06, "loss": 0.1687, "step": 789 }, { "epoch": 0.7369402985074627, "grad_norm": 0.6051345651170129, "learning_rate": 7.007770847718014e-06, "loss": 0.1365, "step": 790 }, { "epoch": 0.7378731343283582, "grad_norm": 0.628319652546469, "learning_rate": 7.001058850078366e-06, "loss": 0.1376, "step": 791 }, { "epoch": 0.7388059701492538, "grad_norm": 0.6191319893086781, "learning_rate": 6.994342555986692e-06, "loss": 0.1452, "step": 792 }, { "epoch": 0.7397388059701493, "grad_norm": 0.6034469090308315, "learning_rate": 6.987621979863475e-06, "loss": 0.1208, "step": 793 }, { "epoch": 0.7406716417910447, "grad_norm": 0.6133022204791418, "learning_rate": 6.9808971361383935e-06, "loss": 0.1558, "step": 794 }, { "epoch": 0.7416044776119403, "grad_norm": 0.7067350649223748, "learning_rate": 6.9741680392502845e-06, "loss": 0.1968, "step": 795 }, { "epoch": 0.7425373134328358, "grad_norm": 0.6019901778940503, "learning_rate": 6.967434703647123e-06, "loss": 0.1253, "step": 796 }, { "epoch": 0.7434701492537313, "grad_norm": 0.7069098467989543, "learning_rate": 6.960697143785979e-06, "loss": 0.1865, "step": 797 }, { "epoch": 0.7444029850746269, "grad_norm": 0.6828020783991653, "learning_rate": 6.953955374132996e-06, "loss": 0.2207, "step": 798 }, { "epoch": 0.7453358208955224, "grad_norm": 0.6439479887962631, "learning_rate": 6.947209409163357e-06, "loss": 0.1522, "step": 799 }, { "epoch": 0.746268656716418, "grad_norm": 0.6788791196377579, "learning_rate": 6.9404592633612486e-06, "loss": 0.1704, "step": 800 }, { "epoch": 0.7472014925373134, "grad_norm": 0.6593748513630283, "learning_rate": 6.93370495121984e-06, "loss": 0.1643, "step": 801 }, { "epoch": 0.7481343283582089, "grad_norm": 0.6693788445939611, "learning_rate": 6.926946487241239e-06, "loss": 0.1639, "step": 802 }, { "epoch": 0.7490671641791045, "grad_norm": 0.7096439868196112, "learning_rate": 6.920183885936473e-06, "loss": 0.1766, "step": 803 }, { "epoch": 0.75, "grad_norm": 0.6229314256085239, "learning_rate": 6.913417161825449e-06, "loss": 0.1608, "step": 804 }, { "epoch": 0.7509328358208955, "grad_norm": 0.6344820604537139, "learning_rate": 6.90664632943693e-06, "loss": 0.1416, "step": 805 }, { "epoch": 0.7518656716417911, "grad_norm": 0.5979797493967405, "learning_rate": 6.899871403308498e-06, "loss": 0.1537, "step": 806 }, { "epoch": 0.7527985074626866, "grad_norm": 0.6659817071614575, "learning_rate": 6.893092397986523e-06, "loss": 0.1714, "step": 807 }, { "epoch": 0.753731343283582, "grad_norm": 0.651999993542865, "learning_rate": 6.886309328026135e-06, "loss": 0.1596, "step": 808 }, { "epoch": 0.7546641791044776, "grad_norm": 0.7035964608126855, "learning_rate": 6.879522207991191e-06, "loss": 0.1805, "step": 809 }, { "epoch": 0.7555970149253731, "grad_norm": 0.6505581026369681, "learning_rate": 6.872731052454243e-06, "loss": 0.1597, "step": 810 }, { "epoch": 0.7565298507462687, "grad_norm": 0.6641476087418174, "learning_rate": 6.865935875996509e-06, "loss": 0.1668, "step": 811 }, { "epoch": 0.7574626865671642, "grad_norm": 0.66827645388351, "learning_rate": 6.85913669320784e-06, "loss": 0.1409, "step": 812 }, { "epoch": 0.7583955223880597, "grad_norm": 0.66511871332777, "learning_rate": 6.852333518686688e-06, "loss": 0.1716, "step": 813 }, { "epoch": 0.7593283582089553, "grad_norm": 0.6102990588726778, "learning_rate": 6.845526367040076e-06, "loss": 0.1472, "step": 814 }, { "epoch": 0.7602611940298507, "grad_norm": 0.6885781911837973, "learning_rate": 6.838715252883567e-06, "loss": 0.1932, "step": 815 }, { "epoch": 0.7611940298507462, "grad_norm": 0.7631518600756162, "learning_rate": 6.831900190841232e-06, "loss": 0.2196, "step": 816 }, { "epoch": 0.7621268656716418, "grad_norm": 0.6293347778856525, "learning_rate": 6.825081195545615e-06, "loss": 0.1714, "step": 817 }, { "epoch": 0.7630597014925373, "grad_norm": 0.6958073554449505, "learning_rate": 6.818258281637709e-06, "loss": 0.1719, "step": 818 }, { "epoch": 0.7639925373134329, "grad_norm": 0.7627000793514362, "learning_rate": 6.811431463766922e-06, "loss": 0.1912, "step": 819 }, { "epoch": 0.7649253731343284, "grad_norm": 0.6265648721870986, "learning_rate": 6.804600756591037e-06, "loss": 0.1424, "step": 820 }, { "epoch": 0.7658582089552238, "grad_norm": 0.678762897285575, "learning_rate": 6.797766174776197e-06, "loss": 0.1904, "step": 821 }, { "epoch": 0.7667910447761194, "grad_norm": 0.6335243539095313, "learning_rate": 6.790927732996855e-06, "loss": 0.1446, "step": 822 }, { "epoch": 0.7677238805970149, "grad_norm": 0.6554738272930144, "learning_rate": 6.78408544593576e-06, "loss": 0.171, "step": 823 }, { "epoch": 0.7686567164179104, "grad_norm": 0.6634498415277502, "learning_rate": 6.777239328283909e-06, "loss": 0.1653, "step": 824 }, { "epoch": 0.769589552238806, "grad_norm": 0.6119276068916335, "learning_rate": 6.770389394740531e-06, "loss": 0.161, "step": 825 }, { "epoch": 0.7705223880597015, "grad_norm": 0.6183957675038846, "learning_rate": 6.763535660013044e-06, "loss": 0.1481, "step": 826 }, { "epoch": 0.7714552238805971, "grad_norm": 0.5973703851118793, "learning_rate": 6.756678138817029e-06, "loss": 0.1308, "step": 827 }, { "epoch": 0.7723880597014925, "grad_norm": 0.6536682055627097, "learning_rate": 6.749816845876196e-06, "loss": 0.1544, "step": 828 }, { "epoch": 0.773320895522388, "grad_norm": 0.6111348581564596, "learning_rate": 6.742951795922355e-06, "loss": 0.143, "step": 829 }, { "epoch": 0.7742537313432836, "grad_norm": 0.6759364246710152, "learning_rate": 6.736083003695378e-06, "loss": 0.1703, "step": 830 }, { "epoch": 0.7751865671641791, "grad_norm": 0.6970838658814377, "learning_rate": 6.729210483943176e-06, "loss": 0.1897, "step": 831 }, { "epoch": 0.7761194029850746, "grad_norm": 0.6444457568834742, "learning_rate": 6.722334251421665e-06, "loss": 0.1392, "step": 832 }, { "epoch": 0.7770522388059702, "grad_norm": 0.6352179393445322, "learning_rate": 6.715454320894728e-06, "loss": 0.1611, "step": 833 }, { "epoch": 0.7779850746268657, "grad_norm": 0.626891297517702, "learning_rate": 6.708570707134192e-06, "loss": 0.1484, "step": 834 }, { "epoch": 0.7789179104477612, "grad_norm": 0.6329212437738361, "learning_rate": 6.701683424919789e-06, "loss": 0.1619, "step": 835 }, { "epoch": 0.7798507462686567, "grad_norm": 0.668930356583675, "learning_rate": 6.6947924890391295e-06, "loss": 0.1641, "step": 836 }, { "epoch": 0.7807835820895522, "grad_norm": 0.7111200506192917, "learning_rate": 6.687897914287667e-06, "loss": 0.1776, "step": 837 }, { "epoch": 0.7817164179104478, "grad_norm": 0.6719133306530883, "learning_rate": 6.680999715468669e-06, "loss": 0.1706, "step": 838 }, { "epoch": 0.7826492537313433, "grad_norm": 0.6911673128561191, "learning_rate": 6.674097907393186e-06, "loss": 0.1989, "step": 839 }, { "epoch": 0.7835820895522388, "grad_norm": 0.6848248249993091, "learning_rate": 6.667192504880016e-06, "loss": 0.174, "step": 840 }, { "epoch": 0.7845149253731343, "grad_norm": 0.6523754063185917, "learning_rate": 6.660283522755674e-06, "loss": 0.1701, "step": 841 }, { "epoch": 0.7854477611940298, "grad_norm": 0.6645826579653784, "learning_rate": 6.653370975854362e-06, "loss": 0.1857, "step": 842 }, { "epoch": 0.7863805970149254, "grad_norm": 0.6705165691584183, "learning_rate": 6.646454879017934e-06, "loss": 0.1756, "step": 843 }, { "epoch": 0.7873134328358209, "grad_norm": 0.6733624140710068, "learning_rate": 6.639535247095868e-06, "loss": 0.1588, "step": 844 }, { "epoch": 0.7882462686567164, "grad_norm": 0.6901618685323484, "learning_rate": 6.632612094945234e-06, "loss": 0.1633, "step": 845 }, { "epoch": 0.789179104477612, "grad_norm": 0.6995034376311401, "learning_rate": 6.625685437430656e-06, "loss": 0.2161, "step": 846 }, { "epoch": 0.7901119402985075, "grad_norm": 0.7166144715139872, "learning_rate": 6.618755289424285e-06, "loss": 0.161, "step": 847 }, { "epoch": 0.7910447761194029, "grad_norm": 0.6322837187781529, "learning_rate": 6.611821665805769e-06, "loss": 0.1513, "step": 848 }, { "epoch": 0.7919776119402985, "grad_norm": 0.6931876938593144, "learning_rate": 6.604884581462219e-06, "loss": 0.1861, "step": 849 }, { "epoch": 0.792910447761194, "grad_norm": 0.6161666915378519, "learning_rate": 6.597944051288169e-06, "loss": 0.1554, "step": 850 }, { "epoch": 0.7938432835820896, "grad_norm": 0.628299656016581, "learning_rate": 6.5910000901855606e-06, "loss": 0.1318, "step": 851 }, { "epoch": 0.7947761194029851, "grad_norm": 0.6310512721546443, "learning_rate": 6.5840527130637e-06, "loss": 0.121, "step": 852 }, { "epoch": 0.7957089552238806, "grad_norm": 0.6545675463474354, "learning_rate": 6.577101934839222e-06, "loss": 0.1483, "step": 853 }, { "epoch": 0.7966417910447762, "grad_norm": 0.6616189556467047, "learning_rate": 6.570147770436071e-06, "loss": 0.1553, "step": 854 }, { "epoch": 0.7975746268656716, "grad_norm": 0.6885986978537596, "learning_rate": 6.56319023478546e-06, "loss": 0.1775, "step": 855 }, { "epoch": 0.7985074626865671, "grad_norm": 0.6266770009080371, "learning_rate": 6.556229342825835e-06, "loss": 0.1467, "step": 856 }, { "epoch": 0.7994402985074627, "grad_norm": 0.6248986672782534, "learning_rate": 6.549265109502856e-06, "loss": 0.14, "step": 857 }, { "epoch": 0.8003731343283582, "grad_norm": 0.6310755665621881, "learning_rate": 6.542297549769353e-06, "loss": 0.1504, "step": 858 }, { "epoch": 0.8013059701492538, "grad_norm": 0.6263689054229051, "learning_rate": 6.5353266785852976e-06, "loss": 0.1528, "step": 859 }, { "epoch": 0.8022388059701493, "grad_norm": 0.6190656982671856, "learning_rate": 6.528352510917774e-06, "loss": 0.1455, "step": 860 }, { "epoch": 0.8031716417910447, "grad_norm": 0.6614782844428062, "learning_rate": 6.521375061740945e-06, "loss": 0.1709, "step": 861 }, { "epoch": 0.8041044776119403, "grad_norm": 0.6300333472335146, "learning_rate": 6.514394346036013e-06, "loss": 0.1644, "step": 862 }, { "epoch": 0.8050373134328358, "grad_norm": 0.6635928030159733, "learning_rate": 6.507410378791198e-06, "loss": 0.1514, "step": 863 }, { "epoch": 0.8059701492537313, "grad_norm": 0.6660139646149384, "learning_rate": 6.500423175001705e-06, "loss": 0.1627, "step": 864 }, { "epoch": 0.8069029850746269, "grad_norm": 0.6452273202690014, "learning_rate": 6.493432749669682e-06, "loss": 0.1351, "step": 865 }, { "epoch": 0.8078358208955224, "grad_norm": 0.6584401266450833, "learning_rate": 6.486439117804195e-06, "loss": 0.1354, "step": 866 }, { "epoch": 0.808768656716418, "grad_norm": 0.6037832710435859, "learning_rate": 6.479442294421199e-06, "loss": 0.1304, "step": 867 }, { "epoch": 0.8097014925373134, "grad_norm": 0.6132682265514223, "learning_rate": 6.472442294543497e-06, "loss": 0.14, "step": 868 }, { "epoch": 0.8106343283582089, "grad_norm": 0.6981872351887359, "learning_rate": 6.465439133200715e-06, "loss": 0.1536, "step": 869 }, { "epoch": 0.8115671641791045, "grad_norm": 0.7063385478962971, "learning_rate": 6.458432825429264e-06, "loss": 0.1687, "step": 870 }, { "epoch": 0.8125, "grad_norm": 0.6730802402247441, "learning_rate": 6.451423386272312e-06, "loss": 0.1634, "step": 871 }, { "epoch": 0.8134328358208955, "grad_norm": 0.6541346990138588, "learning_rate": 6.444410830779753e-06, "loss": 0.1497, "step": 872 }, { "epoch": 0.8143656716417911, "grad_norm": 0.680758817164705, "learning_rate": 6.437395174008169e-06, "loss": 0.1507, "step": 873 }, { "epoch": 0.8152985074626866, "grad_norm": 0.583781690655216, "learning_rate": 6.4303764310208015e-06, "loss": 0.1242, "step": 874 }, { "epoch": 0.816231343283582, "grad_norm": 0.668198643055519, "learning_rate": 6.4233546168875185e-06, "loss": 0.1777, "step": 875 }, { "epoch": 0.8171641791044776, "grad_norm": 0.6457759755119228, "learning_rate": 6.4163297466847795e-06, "loss": 0.1604, "step": 876 }, { "epoch": 0.8180970149253731, "grad_norm": 0.6537544547758258, "learning_rate": 6.409301835495611e-06, "loss": 0.1799, "step": 877 }, { "epoch": 0.8190298507462687, "grad_norm": 0.6659622725837913, "learning_rate": 6.402270898409565e-06, "loss": 0.1477, "step": 878 }, { "epoch": 0.8199626865671642, "grad_norm": 0.5851546918552865, "learning_rate": 6.395236950522691e-06, "loss": 0.1382, "step": 879 }, { "epoch": 0.8208955223880597, "grad_norm": 0.668557386866005, "learning_rate": 6.388200006937503e-06, "loss": 0.1662, "step": 880 }, { "epoch": 0.8218283582089553, "grad_norm": 0.676034206198378, "learning_rate": 6.381160082762949e-06, "loss": 0.1777, "step": 881 }, { "epoch": 0.8227611940298507, "grad_norm": 0.6495874193987915, "learning_rate": 6.374117193114373e-06, "loss": 0.1615, "step": 882 }, { "epoch": 0.8236940298507462, "grad_norm": 0.6459261340201581, "learning_rate": 6.3670713531134865e-06, "loss": 0.1767, "step": 883 }, { "epoch": 0.8246268656716418, "grad_norm": 0.6421258748104782, "learning_rate": 6.3600225778883395e-06, "loss": 0.1693, "step": 884 }, { "epoch": 0.8255597014925373, "grad_norm": 0.6545974757064171, "learning_rate": 6.352970882573283e-06, "loss": 0.1737, "step": 885 }, { "epoch": 0.8264925373134329, "grad_norm": 0.6493724579334709, "learning_rate": 6.3459162823089325e-06, "loss": 0.1556, "step": 886 }, { "epoch": 0.8274253731343284, "grad_norm": 0.6676608360691262, "learning_rate": 6.338858792242147e-06, "loss": 0.1763, "step": 887 }, { "epoch": 0.8283582089552238, "grad_norm": 0.6049839910738554, "learning_rate": 6.33179842752599e-06, "loss": 0.1257, "step": 888 }, { "epoch": 0.8292910447761194, "grad_norm": 0.6428611552705268, "learning_rate": 6.324735203319691e-06, "loss": 0.1574, "step": 889 }, { "epoch": 0.8302238805970149, "grad_norm": 0.6586742982563667, "learning_rate": 6.317669134788625e-06, "loss": 0.1522, "step": 890 }, { "epoch": 0.8311567164179104, "grad_norm": 0.7110106799064964, "learning_rate": 6.3106002371042716e-06, "loss": 0.1872, "step": 891 }, { "epoch": 0.832089552238806, "grad_norm": 0.6719841907045824, "learning_rate": 6.303528525444185e-06, "loss": 0.1475, "step": 892 }, { "epoch": 0.8330223880597015, "grad_norm": 0.6166174883666973, "learning_rate": 6.296454014991962e-06, "loss": 0.1477, "step": 893 }, { "epoch": 0.8339552238805971, "grad_norm": 0.6566620847273986, "learning_rate": 6.289376720937208e-06, "loss": 0.1511, "step": 894 }, { "epoch": 0.8348880597014925, "grad_norm": 0.6716376481618703, "learning_rate": 6.282296658475508e-06, "loss": 0.1662, "step": 895 }, { "epoch": 0.835820895522388, "grad_norm": 0.6869208670866339, "learning_rate": 6.275213842808383e-06, "loss": 0.174, "step": 896 }, { "epoch": 0.8367537313432836, "grad_norm": 0.683558060471553, "learning_rate": 6.268128289143274e-06, "loss": 0.1849, "step": 897 }, { "epoch": 0.8376865671641791, "grad_norm": 0.7279594066367395, "learning_rate": 6.261040012693498e-06, "loss": 0.183, "step": 898 }, { "epoch": 0.8386194029850746, "grad_norm": 0.6049401294445522, "learning_rate": 6.253949028678214e-06, "loss": 0.1298, "step": 899 }, { "epoch": 0.8395522388059702, "grad_norm": 0.651668225479089, "learning_rate": 6.246855352322403e-06, "loss": 0.1583, "step": 900 }, { "epoch": 0.8404850746268657, "grad_norm": 0.6416615840983867, "learning_rate": 6.2397589988568175e-06, "loss": 0.1376, "step": 901 }, { "epoch": 0.8414179104477612, "grad_norm": 0.7147095039140896, "learning_rate": 6.232659983517964e-06, "loss": 0.1876, "step": 902 }, { "epoch": 0.8423507462686567, "grad_norm": 0.6531827460091617, "learning_rate": 6.22555832154806e-06, "loss": 0.1357, "step": 903 }, { "epoch": 0.8432835820895522, "grad_norm": 0.6618043559108994, "learning_rate": 6.21845402819501e-06, "loss": 0.1569, "step": 904 }, { "epoch": 0.8442164179104478, "grad_norm": 0.6992617718392852, "learning_rate": 6.211347118712365e-06, "loss": 0.1919, "step": 905 }, { "epoch": 0.8451492537313433, "grad_norm": 0.6395344796658351, "learning_rate": 6.204237608359296e-06, "loss": 0.1508, "step": 906 }, { "epoch": 0.8460820895522388, "grad_norm": 0.6247571573975099, "learning_rate": 6.197125512400555e-06, "loss": 0.1375, "step": 907 }, { "epoch": 0.8470149253731343, "grad_norm": 0.6041286044681576, "learning_rate": 6.190010846106446e-06, "loss": 0.1487, "step": 908 }, { "epoch": 0.8479477611940298, "grad_norm": 0.7116785372539952, "learning_rate": 6.182893624752796e-06, "loss": 0.1913, "step": 909 }, { "epoch": 0.8488805970149254, "grad_norm": 0.6655756346279249, "learning_rate": 6.1757738636209115e-06, "loss": 0.167, "step": 910 }, { "epoch": 0.8498134328358209, "grad_norm": 0.5939016058200892, "learning_rate": 6.168651577997558e-06, "loss": 0.1295, "step": 911 }, { "epoch": 0.8507462686567164, "grad_norm": 0.6970371773025258, "learning_rate": 6.161526783174917e-06, "loss": 0.1704, "step": 912 }, { "epoch": 0.851679104477612, "grad_norm": 0.6290951167460345, "learning_rate": 6.154399494450559e-06, "loss": 0.1564, "step": 913 }, { "epoch": 0.8526119402985075, "grad_norm": 0.6587625637787665, "learning_rate": 6.14726972712741e-06, "loss": 0.1656, "step": 914 }, { "epoch": 0.8535447761194029, "grad_norm": 0.6380824784402694, "learning_rate": 6.140137496513718e-06, "loss": 0.1404, "step": 915 }, { "epoch": 0.8544776119402985, "grad_norm": 0.6451494052413029, "learning_rate": 6.1330028179230185e-06, "loss": 0.1542, "step": 916 }, { "epoch": 0.855410447761194, "grad_norm": 0.5764267797701579, "learning_rate": 6.125865706674103e-06, "loss": 0.12, "step": 917 }, { "epoch": 0.8563432835820896, "grad_norm": 0.6214787903515285, "learning_rate": 6.1187261780909835e-06, "loss": 0.1539, "step": 918 }, { "epoch": 0.8572761194029851, "grad_norm": 0.6454981166080308, "learning_rate": 6.111584247502871e-06, "loss": 0.1674, "step": 919 }, { "epoch": 0.8582089552238806, "grad_norm": 0.6574298864670908, "learning_rate": 6.104439930244125e-06, "loss": 0.1285, "step": 920 }, { "epoch": 0.8591417910447762, "grad_norm": 0.6882043914824859, "learning_rate": 6.0972932416542326e-06, "loss": 0.1729, "step": 921 }, { "epoch": 0.8600746268656716, "grad_norm": 0.6366434691020015, "learning_rate": 6.090144197077774e-06, "loss": 0.1772, "step": 922 }, { "epoch": 0.8610074626865671, "grad_norm": 0.643920952429573, "learning_rate": 6.082992811864385e-06, "loss": 0.156, "step": 923 }, { "epoch": 0.8619402985074627, "grad_norm": 0.6354667751423755, "learning_rate": 6.075839101368728e-06, "loss": 0.1511, "step": 924 }, { "epoch": 0.8628731343283582, "grad_norm": 0.6871530371089948, "learning_rate": 6.068683080950458e-06, "loss": 0.1984, "step": 925 }, { "epoch": 0.8638059701492538, "grad_norm": 0.6607028991265234, "learning_rate": 6.061524765974191e-06, "loss": 0.1555, "step": 926 }, { "epoch": 0.8647388059701493, "grad_norm": 0.6598179881530127, "learning_rate": 6.054364171809467e-06, "loss": 0.1375, "step": 927 }, { "epoch": 0.8656716417910447, "grad_norm": 0.6793163389991403, "learning_rate": 6.047201313830724e-06, "loss": 0.1737, "step": 928 }, { "epoch": 0.8666044776119403, "grad_norm": 0.6964832269431211, "learning_rate": 6.040036207417252e-06, "loss": 0.1798, "step": 929 }, { "epoch": 0.8675373134328358, "grad_norm": 0.6243572543003991, "learning_rate": 6.032868867953181e-06, "loss": 0.1552, "step": 930 }, { "epoch": 0.8684701492537313, "grad_norm": 0.6700761492460738, "learning_rate": 6.025699310827423e-06, "loss": 0.1714, "step": 931 }, { "epoch": 0.8694029850746269, "grad_norm": 0.6329032543774508, "learning_rate": 6.01852755143366e-06, "loss": 0.126, "step": 932 }, { "epoch": 0.8703358208955224, "grad_norm": 0.6641047777754848, "learning_rate": 6.011353605170303e-06, "loss": 0.1616, "step": 933 }, { "epoch": 0.871268656716418, "grad_norm": 0.6598124928665948, "learning_rate": 6.004177487440448e-06, "loss": 0.151, "step": 934 }, { "epoch": 0.8722014925373134, "grad_norm": 0.6209217935216808, "learning_rate": 5.996999213651866e-06, "loss": 0.149, "step": 935 }, { "epoch": 0.8731343283582089, "grad_norm": 0.6677024331611352, "learning_rate": 5.98981879921695e-06, "loss": 0.1563, "step": 936 }, { "epoch": 0.8740671641791045, "grad_norm": 0.6243961319811943, "learning_rate": 5.982636259552691e-06, "loss": 0.1516, "step": 937 }, { "epoch": 0.875, "grad_norm": 0.5841565891923488, "learning_rate": 5.975451610080643e-06, "loss": 0.133, "step": 938 }, { "epoch": 0.8759328358208955, "grad_norm": 0.6363581025691273, "learning_rate": 5.968264866226888e-06, "loss": 0.1482, "step": 939 }, { "epoch": 0.8768656716417911, "grad_norm": 0.7074597980519941, "learning_rate": 5.961076043422011e-06, "loss": 0.1709, "step": 940 }, { "epoch": 0.8777985074626866, "grad_norm": 0.7014916004205767, "learning_rate": 5.953885157101054e-06, "loss": 0.2082, "step": 941 }, { "epoch": 0.878731343283582, "grad_norm": 0.6522925481558587, "learning_rate": 5.9466922227034915e-06, "loss": 0.1628, "step": 942 }, { "epoch": 0.8796641791044776, "grad_norm": 0.7162690694645696, "learning_rate": 5.939497255673197e-06, "loss": 0.1861, "step": 943 }, { "epoch": 0.8805970149253731, "grad_norm": 0.683816239262944, "learning_rate": 5.932300271458406e-06, "loss": 0.1453, "step": 944 }, { "epoch": 0.8815298507462687, "grad_norm": 0.649777422045162, "learning_rate": 5.925101285511687e-06, "loss": 0.1342, "step": 945 }, { "epoch": 0.8824626865671642, "grad_norm": 0.6164064931621339, "learning_rate": 5.9179003132899075e-06, "loss": 0.1561, "step": 946 }, { "epoch": 0.8833955223880597, "grad_norm": 0.6281310154028873, "learning_rate": 5.910697370254195e-06, "loss": 0.1602, "step": 947 }, { "epoch": 0.8843283582089553, "grad_norm": 0.6420613888738308, "learning_rate": 5.90349247186991e-06, "loss": 0.1445, "step": 948 }, { "epoch": 0.8852611940298507, "grad_norm": 0.6164248908851191, "learning_rate": 5.8962856336066175e-06, "loss": 0.1412, "step": 949 }, { "epoch": 0.8861940298507462, "grad_norm": 0.707206598513977, "learning_rate": 5.889076870938041e-06, "loss": 0.1808, "step": 950 }, { "epoch": 0.8871268656716418, "grad_norm": 0.6686628088315595, "learning_rate": 5.881866199342035e-06, "loss": 0.1903, "step": 951 }, { "epoch": 0.8880597014925373, "grad_norm": 0.6524990754759882, "learning_rate": 5.874653634300555e-06, "loss": 0.1485, "step": 952 }, { "epoch": 0.8889925373134329, "grad_norm": 0.6812412226725866, "learning_rate": 5.867439191299629e-06, "loss": 0.1865, "step": 953 }, { "epoch": 0.8899253731343284, "grad_norm": 0.6696875154391545, "learning_rate": 5.860222885829302e-06, "loss": 0.1738, "step": 954 }, { "epoch": 0.8908582089552238, "grad_norm": 0.686834394536641, "learning_rate": 5.853004733383631e-06, "loss": 0.1796, "step": 955 }, { "epoch": 0.8917910447761194, "grad_norm": 0.6289904088954222, "learning_rate": 5.845784749460632e-06, "loss": 0.1553, "step": 956 }, { "epoch": 0.8927238805970149, "grad_norm": 0.6460320678165847, "learning_rate": 5.838562949562257e-06, "loss": 0.1735, "step": 957 }, { "epoch": 0.8936567164179104, "grad_norm": 0.6367716823836521, "learning_rate": 5.831339349194352e-06, "loss": 0.1528, "step": 958 }, { "epoch": 0.894589552238806, "grad_norm": 0.6053082787080515, "learning_rate": 5.824113963866635e-06, "loss": 0.1258, "step": 959 }, { "epoch": 0.8955223880597015, "grad_norm": 0.6563429416704861, "learning_rate": 5.816886809092651e-06, "loss": 0.1488, "step": 960 }, { "epoch": 0.8964552238805971, "grad_norm": 0.6139789210109271, "learning_rate": 5.809657900389749e-06, "loss": 0.1378, "step": 961 }, { "epoch": 0.8973880597014925, "grad_norm": 0.6408348268476023, "learning_rate": 5.802427253279042e-06, "loss": 0.1418, "step": 962 }, { "epoch": 0.898320895522388, "grad_norm": 0.6785166404224208, "learning_rate": 5.795194883285371e-06, "loss": 0.1754, "step": 963 }, { "epoch": 0.8992537313432836, "grad_norm": 0.6363322075161706, "learning_rate": 5.787960805937283e-06, "loss": 0.1472, "step": 964 }, { "epoch": 0.9001865671641791, "grad_norm": 0.6287667770589349, "learning_rate": 5.780725036766988e-06, "loss": 0.1482, "step": 965 }, { "epoch": 0.9011194029850746, "grad_norm": 0.6323926031731353, "learning_rate": 5.773487591310329e-06, "loss": 0.1464, "step": 966 }, { "epoch": 0.9020522388059702, "grad_norm": 0.5707176731549668, "learning_rate": 5.7662484851067435e-06, "loss": 0.1214, "step": 967 }, { "epoch": 0.9029850746268657, "grad_norm": 0.6295522748184718, "learning_rate": 5.759007733699245e-06, "loss": 0.1653, "step": 968 }, { "epoch": 0.9039179104477612, "grad_norm": 0.7149477891939764, "learning_rate": 5.751765352634369e-06, "loss": 0.1994, "step": 969 }, { "epoch": 0.9048507462686567, "grad_norm": 0.6009826286855905, "learning_rate": 5.7445213574621565e-06, "loss": 0.1414, "step": 970 }, { "epoch": 0.9057835820895522, "grad_norm": 0.62089407644033, "learning_rate": 5.73727576373611e-06, "loss": 0.137, "step": 971 }, { "epoch": 0.9067164179104478, "grad_norm": 0.6990986179584189, "learning_rate": 5.730028587013168e-06, "loss": 0.1744, "step": 972 }, { "epoch": 0.9076492537313433, "grad_norm": 0.6271630229992479, "learning_rate": 5.722779842853665e-06, "loss": 0.151, "step": 973 }, { "epoch": 0.9085820895522388, "grad_norm": 0.6047483280405747, "learning_rate": 5.715529546821303e-06, "loss": 0.1314, "step": 974 }, { "epoch": 0.9095149253731343, "grad_norm": 0.6706986802485786, "learning_rate": 5.708277714483114e-06, "loss": 0.1816, "step": 975 }, { "epoch": 0.9104477611940298, "grad_norm": 0.5718449691354516, "learning_rate": 5.701024361409431e-06, "loss": 0.1195, "step": 976 }, { "epoch": 0.9113805970149254, "grad_norm": 0.6715148327443069, "learning_rate": 5.693769503173847e-06, "loss": 0.1703, "step": 977 }, { "epoch": 0.9123134328358209, "grad_norm": 0.6728800105187092, "learning_rate": 5.6865131553531925e-06, "loss": 0.2004, "step": 978 }, { "epoch": 0.9132462686567164, "grad_norm": 0.6115075748062551, "learning_rate": 5.679255333527498e-06, "loss": 0.1424, "step": 979 }, { "epoch": 0.914179104477612, "grad_norm": 0.6490086269997578, "learning_rate": 5.671996053279949e-06, "loss": 0.1581, "step": 980 }, { "epoch": 0.9151119402985075, "grad_norm": 0.658615061579246, "learning_rate": 5.664735330196871e-06, "loss": 0.161, "step": 981 }, { "epoch": 0.9160447761194029, "grad_norm": 0.6667853064931762, "learning_rate": 5.657473179867686e-06, "loss": 0.18, "step": 982 }, { "epoch": 0.9169776119402985, "grad_norm": 0.6264433279063906, "learning_rate": 5.6502096178848786e-06, "loss": 0.1282, "step": 983 }, { "epoch": 0.917910447761194, "grad_norm": 0.6509350903508789, "learning_rate": 5.642944659843962e-06, "loss": 0.1549, "step": 984 }, { "epoch": 0.9188432835820896, "grad_norm": 0.664636252067674, "learning_rate": 5.635678321343453e-06, "loss": 0.1498, "step": 985 }, { "epoch": 0.9197761194029851, "grad_norm": 0.6389558224864558, "learning_rate": 5.628410617984828e-06, "loss": 0.1415, "step": 986 }, { "epoch": 0.9207089552238806, "grad_norm": 0.6431263168389677, "learning_rate": 5.6211415653724965e-06, "loss": 0.1494, "step": 987 }, { "epoch": 0.9216417910447762, "grad_norm": 0.6465652687143928, "learning_rate": 5.613871179113761e-06, "loss": 0.1588, "step": 988 }, { "epoch": 0.9225746268656716, "grad_norm": 0.6967638561816738, "learning_rate": 5.606599474818793e-06, "loss": 0.1825, "step": 989 }, { "epoch": 0.9235074626865671, "grad_norm": 0.6637008784193407, "learning_rate": 5.5993264681005875e-06, "loss": 0.1615, "step": 990 }, { "epoch": 0.9244402985074627, "grad_norm": 0.6697373517600586, "learning_rate": 5.592052174574942e-06, "loss": 0.1553, "step": 991 }, { "epoch": 0.9253731343283582, "grad_norm": 0.5870279395078305, "learning_rate": 5.584776609860414e-06, "loss": 0.1161, "step": 992 }, { "epoch": 0.9263059701492538, "grad_norm": 0.631872774108136, "learning_rate": 5.5774997895782875e-06, "loss": 0.1518, "step": 993 }, { "epoch": 0.9272388059701493, "grad_norm": 0.6185158979695748, "learning_rate": 5.570221729352549e-06, "loss": 0.1692, "step": 994 }, { "epoch": 0.9281716417910447, "grad_norm": 0.6951772180234128, "learning_rate": 5.562942444809842e-06, "loss": 0.1841, "step": 995 }, { "epoch": 0.9291044776119403, "grad_norm": 0.6703495119444423, "learning_rate": 5.555661951579443e-06, "loss": 0.162, "step": 996 }, { "epoch": 0.9300373134328358, "grad_norm": 0.6280141280257645, "learning_rate": 5.5483802652932165e-06, "loss": 0.1533, "step": 997 }, { "epoch": 0.9309701492537313, "grad_norm": 0.5826001928846871, "learning_rate": 5.541097401585596e-06, "loss": 0.1293, "step": 998 }, { "epoch": 0.9319029850746269, "grad_norm": 0.5586984711716011, "learning_rate": 5.53381337609354e-06, "loss": 0.1259, "step": 999 }, { "epoch": 0.9328358208955224, "grad_norm": 0.6017947961229149, "learning_rate": 5.5265282044565005e-06, "loss": 0.1282, "step": 1000 }, { "epoch": 0.9328358208955224, "eval_loss": 0.1615498661994934, "eval_runtime": 4.2081, "eval_samples_per_second": 20.675, "eval_steps_per_second": 5.228, "step": 1000 }, { "epoch": 0.933768656716418, "grad_norm": 0.5718055913082273, "learning_rate": 5.519241902316392e-06, "loss": 0.1316, "step": 1001 }, { "epoch": 0.9347014925373134, "grad_norm": 0.6814255753813562, "learning_rate": 5.511954485317558e-06, "loss": 0.1834, "step": 1002 }, { "epoch": 0.9356343283582089, "grad_norm": 0.6217528796472293, "learning_rate": 5.504665969106731e-06, "loss": 0.1379, "step": 1003 }, { "epoch": 0.9365671641791045, "grad_norm": 0.5823147160814418, "learning_rate": 5.497376369333005e-06, "loss": 0.1412, "step": 1004 }, { "epoch": 0.9375, "grad_norm": 0.6575429795763965, "learning_rate": 5.490085701647805e-06, "loss": 0.1388, "step": 1005 }, { "epoch": 0.9384328358208955, "grad_norm": 0.6145246742728379, "learning_rate": 5.482793981704842e-06, "loss": 0.1107, "step": 1006 }, { "epoch": 0.9393656716417911, "grad_norm": 0.6526638412794066, "learning_rate": 5.475501225160092e-06, "loss": 0.1581, "step": 1007 }, { "epoch": 0.9402985074626866, "grad_norm": 0.6686862227894702, "learning_rate": 5.468207447671755e-06, "loss": 0.1716, "step": 1008 }, { "epoch": 0.941231343283582, "grad_norm": 0.6321981089714285, "learning_rate": 5.4609126649002206e-06, "loss": 0.1456, "step": 1009 }, { "epoch": 0.9421641791044776, "grad_norm": 0.652632294015238, "learning_rate": 5.45361689250804e-06, "loss": 0.1487, "step": 1010 }, { "epoch": 0.9430970149253731, "grad_norm": 0.6254570670548221, "learning_rate": 5.446320146159888e-06, "loss": 0.1521, "step": 1011 }, { "epoch": 0.9440298507462687, "grad_norm": 0.6540598060852532, "learning_rate": 5.43902244152253e-06, "loss": 0.146, "step": 1012 }, { "epoch": 0.9449626865671642, "grad_norm": 0.6149733185153631, "learning_rate": 5.431723794264789e-06, "loss": 0.133, "step": 1013 }, { "epoch": 0.9458955223880597, "grad_norm": 0.602931095003715, "learning_rate": 5.424424220057514e-06, "loss": 0.1404, "step": 1014 }, { "epoch": 0.9468283582089553, "grad_norm": 0.7077218077624783, "learning_rate": 5.417123734573541e-06, "loss": 0.1711, "step": 1015 }, { "epoch": 0.9477611940298507, "grad_norm": 0.714477722488728, "learning_rate": 5.409822353487666e-06, "loss": 0.1826, "step": 1016 }, { "epoch": 0.9486940298507462, "grad_norm": 0.629887365608587, "learning_rate": 5.402520092476604e-06, "loss": 0.149, "step": 1017 }, { "epoch": 0.9496268656716418, "grad_norm": 0.6465083247012406, "learning_rate": 5.395216967218961e-06, "loss": 0.147, "step": 1018 }, { "epoch": 0.9505597014925373, "grad_norm": 0.6753984859917674, "learning_rate": 5.387912993395203e-06, "loss": 0.1705, "step": 1019 }, { "epoch": 0.9514925373134329, "grad_norm": 0.6841326177768018, "learning_rate": 5.38060818668761e-06, "loss": 0.173, "step": 1020 }, { "epoch": 0.9524253731343284, "grad_norm": 0.6065429835879118, "learning_rate": 5.373302562780256e-06, "loss": 0.1374, "step": 1021 }, { "epoch": 0.9533582089552238, "grad_norm": 0.6639345475762953, "learning_rate": 5.365996137358969e-06, "loss": 0.1534, "step": 1022 }, { "epoch": 0.9542910447761194, "grad_norm": 0.6293078144730327, "learning_rate": 5.358688926111293e-06, "loss": 0.1389, "step": 1023 }, { "epoch": 0.9552238805970149, "grad_norm": 0.6145698011074788, "learning_rate": 5.351380944726465e-06, "loss": 0.1639, "step": 1024 }, { "epoch": 0.9561567164179104, "grad_norm": 0.7156975471246391, "learning_rate": 5.344072208895376e-06, "loss": 0.1961, "step": 1025 }, { "epoch": 0.957089552238806, "grad_norm": 0.6497381256316531, "learning_rate": 5.33676273431053e-06, "loss": 0.161, "step": 1026 }, { "epoch": 0.9580223880597015, "grad_norm": 0.6653820230766111, "learning_rate": 5.329452536666025e-06, "loss": 0.1566, "step": 1027 }, { "epoch": 0.9589552238805971, "grad_norm": 0.6770803937602515, "learning_rate": 5.322141631657507e-06, "loss": 0.1795, "step": 1028 }, { "epoch": 0.9598880597014925, "grad_norm": 0.6638286206553158, "learning_rate": 5.314830034982142e-06, "loss": 0.1643, "step": 1029 }, { "epoch": 0.960820895522388, "grad_norm": 0.6457451187187653, "learning_rate": 5.30751776233858e-06, "loss": 0.1636, "step": 1030 }, { "epoch": 0.9617537313432836, "grad_norm": 0.6449507218899554, "learning_rate": 5.300204829426923e-06, "loss": 0.1592, "step": 1031 }, { "epoch": 0.9626865671641791, "grad_norm": 0.6425447917016437, "learning_rate": 5.292891251948694e-06, "loss": 0.1592, "step": 1032 }, { "epoch": 0.9636194029850746, "grad_norm": 0.6422303695154186, "learning_rate": 5.2855770456067936e-06, "loss": 0.1402, "step": 1033 }, { "epoch": 0.9645522388059702, "grad_norm": 0.628524387802428, "learning_rate": 5.278262226105476e-06, "loss": 0.1433, "step": 1034 }, { "epoch": 0.9654850746268657, "grad_norm": 0.6604686623686596, "learning_rate": 5.270946809150315e-06, "loss": 0.1616, "step": 1035 }, { "epoch": 0.9664179104477612, "grad_norm": 0.6112609577558104, "learning_rate": 5.263630810448161e-06, "loss": 0.1535, "step": 1036 }, { "epoch": 0.9673507462686567, "grad_norm": 0.6790722707545481, "learning_rate": 5.256314245707118e-06, "loss": 0.172, "step": 1037 }, { "epoch": 0.9682835820895522, "grad_norm": 0.6309138119517526, "learning_rate": 5.2489971306365025e-06, "loss": 0.1406, "step": 1038 }, { "epoch": 0.9692164179104478, "grad_norm": 0.6162336981396919, "learning_rate": 5.2416794809468145e-06, "loss": 0.1309, "step": 1039 }, { "epoch": 0.9701492537313433, "grad_norm": 0.6298712111544099, "learning_rate": 5.234361312349701e-06, "loss": 0.1551, "step": 1040 }, { "epoch": 0.9710820895522388, "grad_norm": 0.6422763001962395, "learning_rate": 5.227042640557924e-06, "loss": 0.1527, "step": 1041 }, { "epoch": 0.9720149253731343, "grad_norm": 0.5934675281952629, "learning_rate": 5.219723481285326e-06, "loss": 0.1241, "step": 1042 }, { "epoch": 0.9729477611940298, "grad_norm": 0.6644367669541928, "learning_rate": 5.212403850246794e-06, "loss": 0.1621, "step": 1043 }, { "epoch": 0.9738805970149254, "grad_norm": 0.6568363049342306, "learning_rate": 5.205083763158228e-06, "loss": 0.177, "step": 1044 }, { "epoch": 0.9748134328358209, "grad_norm": 0.6110217519268158, "learning_rate": 5.197763235736512e-06, "loss": 0.1377, "step": 1045 }, { "epoch": 0.9757462686567164, "grad_norm": 0.6377096237664648, "learning_rate": 5.190442283699472e-06, "loss": 0.1475, "step": 1046 }, { "epoch": 0.976679104477612, "grad_norm": 0.6418612742304864, "learning_rate": 5.183120922765842e-06, "loss": 0.146, "step": 1047 }, { "epoch": 0.9776119402985075, "grad_norm": 0.5942069509977499, "learning_rate": 5.175799168655241e-06, "loss": 0.1397, "step": 1048 }, { "epoch": 0.9785447761194029, "grad_norm": 0.698870903377465, "learning_rate": 5.168477037088129e-06, "loss": 0.1885, "step": 1049 }, { "epoch": 0.9794776119402985, "grad_norm": 0.6296076877604461, "learning_rate": 5.161154543785773e-06, "loss": 0.1339, "step": 1050 }, { "epoch": 0.980410447761194, "grad_norm": 0.6506865357090094, "learning_rate": 5.153831704470224e-06, "loss": 0.1616, "step": 1051 }, { "epoch": 0.9813432835820896, "grad_norm": 0.6330150990495571, "learning_rate": 5.146508534864267e-06, "loss": 0.1412, "step": 1052 }, { "epoch": 0.9822761194029851, "grad_norm": 0.6235711220815392, "learning_rate": 5.1391850506914055e-06, "loss": 0.1527, "step": 1053 }, { "epoch": 0.9832089552238806, "grad_norm": 0.6304661752326972, "learning_rate": 5.131861267675813e-06, "loss": 0.1484, "step": 1054 }, { "epoch": 0.9841417910447762, "grad_norm": 0.6693819047596455, "learning_rate": 5.124537201542303e-06, "loss": 0.1647, "step": 1055 }, { "epoch": 0.9850746268656716, "grad_norm": 0.7744141805828401, "learning_rate": 5.117212868016303e-06, "loss": 0.1441, "step": 1056 }, { "epoch": 0.9860074626865671, "grad_norm": 0.6548139493358044, "learning_rate": 5.109888282823809e-06, "loss": 0.1686, "step": 1057 }, { "epoch": 0.9869402985074627, "grad_norm": 0.6591653152693914, "learning_rate": 5.10256346169136e-06, "loss": 0.1627, "step": 1058 }, { "epoch": 0.9878731343283582, "grad_norm": 0.7173487440613779, "learning_rate": 5.095238420346e-06, "loss": 0.2048, "step": 1059 }, { "epoch": 0.9888059701492538, "grad_norm": 0.6199828519251331, "learning_rate": 5.087913174515247e-06, "loss": 0.1245, "step": 1060 }, { "epoch": 0.9897388059701493, "grad_norm": 0.6326273408496673, "learning_rate": 5.080587739927061e-06, "loss": 0.1437, "step": 1061 }, { "epoch": 0.9906716417910447, "grad_norm": 0.5986145968044964, "learning_rate": 5.073262132309801e-06, "loss": 0.1263, "step": 1062 }, { "epoch": 0.9916044776119403, "grad_norm": 0.5987409717318135, "learning_rate": 5.0659363673922e-06, "loss": 0.1466, "step": 1063 }, { "epoch": 0.9925373134328358, "grad_norm": 0.6587235233974444, "learning_rate": 5.058610460903332e-06, "loss": 0.1702, "step": 1064 }, { "epoch": 0.9934701492537313, "grad_norm": 0.6454557259605418, "learning_rate": 5.0512844285725715e-06, "loss": 0.1535, "step": 1065 }, { "epoch": 0.9944029850746269, "grad_norm": 0.6236742148996864, "learning_rate": 5.043958286129562e-06, "loss": 0.1516, "step": 1066 }, { "epoch": 0.9953358208955224, "grad_norm": 0.6455548139628794, "learning_rate": 5.036632049304189e-06, "loss": 0.1375, "step": 1067 }, { "epoch": 0.996268656716418, "grad_norm": 0.6151730812988808, "learning_rate": 5.029305733826533e-06, "loss": 0.148, "step": 1068 }, { "epoch": 0.9972014925373134, "grad_norm": 0.6205215585296048, "learning_rate": 5.021979355426851e-06, "loss": 0.1399, "step": 1069 }, { "epoch": 0.9981343283582089, "grad_norm": 0.6244910569093332, "learning_rate": 5.0146529298355305e-06, "loss": 0.1634, "step": 1070 }, { "epoch": 0.9990671641791045, "grad_norm": 0.5979550305375504, "learning_rate": 5.007326472783061e-06, "loss": 0.1391, "step": 1071 }, { "epoch": 1.0, "grad_norm": 0.5890678138719991, "learning_rate": 5e-06, "loss": 0.1128, "step": 1072 }, { "epoch": 1.0009328358208955, "grad_norm": 0.6077292858791679, "learning_rate": 4.992673527216939e-06, "loss": 0.101, "step": 1073 }, { "epoch": 1.001865671641791, "grad_norm": 0.6253314281531107, "learning_rate": 4.985347070164471e-06, "loss": 0.1124, "step": 1074 }, { "epoch": 1.0027985074626866, "grad_norm": 0.6448517263895885, "learning_rate": 4.97802064457315e-06, "loss": 0.1256, "step": 1075 }, { "epoch": 1.0037313432835822, "grad_norm": 0.6123999338914412, "learning_rate": 4.970694266173467e-06, "loss": 0.1273, "step": 1076 }, { "epoch": 1.0046641791044777, "grad_norm": 0.6072143627552756, "learning_rate": 4.963367950695814e-06, "loss": 0.1168, "step": 1077 }, { "epoch": 1.0055970149253732, "grad_norm": 0.6490374728824142, "learning_rate": 4.956041713870439e-06, "loss": 0.1401, "step": 1078 }, { "epoch": 1.0065298507462686, "grad_norm": 0.705368510157516, "learning_rate": 4.948715571427432e-06, "loss": 0.1329, "step": 1079 }, { "epoch": 1.007462686567164, "grad_norm": 0.6389605051926404, "learning_rate": 4.94138953909667e-06, "loss": 0.1169, "step": 1080 }, { "epoch": 1.0083955223880596, "grad_norm": 0.6928966408848014, "learning_rate": 4.934063632607802e-06, "loss": 0.1192, "step": 1081 }, { "epoch": 1.0093283582089552, "grad_norm": 0.6504578442129032, "learning_rate": 4.9267378676902014e-06, "loss": 0.1035, "step": 1082 }, { "epoch": 1.0102611940298507, "grad_norm": 0.7320308348172838, "learning_rate": 4.9194122600729396e-06, "loss": 0.1219, "step": 1083 }, { "epoch": 1.0111940298507462, "grad_norm": 0.718406825415337, "learning_rate": 4.9120868254847535e-06, "loss": 0.0999, "step": 1084 }, { "epoch": 1.0121268656716418, "grad_norm": 0.7268018830688721, "learning_rate": 4.9047615796540014e-06, "loss": 0.1295, "step": 1085 }, { "epoch": 1.0130597014925373, "grad_norm": 0.7411741935391241, "learning_rate": 4.897436538308641e-06, "loss": 0.1201, "step": 1086 }, { "epoch": 1.0139925373134329, "grad_norm": 0.699298312894494, "learning_rate": 4.890111717176193e-06, "loss": 0.1202, "step": 1087 }, { "epoch": 1.0149253731343284, "grad_norm": 0.7363599458233627, "learning_rate": 4.882787131983698e-06, "loss": 0.101, "step": 1088 }, { "epoch": 1.015858208955224, "grad_norm": 0.6810080971380015, "learning_rate": 4.875462798457698e-06, "loss": 0.1037, "step": 1089 }, { "epoch": 1.0167910447761195, "grad_norm": 0.6554692082630175, "learning_rate": 4.8681387323241895e-06, "loss": 0.0996, "step": 1090 }, { "epoch": 1.017723880597015, "grad_norm": 0.6500823890037924, "learning_rate": 4.860814949308595e-06, "loss": 0.0995, "step": 1091 }, { "epoch": 1.0186567164179103, "grad_norm": 0.67629523495468, "learning_rate": 4.853491465135733e-06, "loss": 0.1283, "step": 1092 }, { "epoch": 1.0195895522388059, "grad_norm": 0.6781761709131539, "learning_rate": 4.8461682955297795e-06, "loss": 0.1219, "step": 1093 }, { "epoch": 1.0205223880597014, "grad_norm": 0.6202474014248146, "learning_rate": 4.83884545621423e-06, "loss": 0.1053, "step": 1094 }, { "epoch": 1.021455223880597, "grad_norm": 0.6565091899123789, "learning_rate": 4.831522962911874e-06, "loss": 0.0938, "step": 1095 }, { "epoch": 1.0223880597014925, "grad_norm": 0.6876373910494644, "learning_rate": 4.82420083134476e-06, "loss": 0.1375, "step": 1096 }, { "epoch": 1.023320895522388, "grad_norm": 0.6853935945976264, "learning_rate": 4.8168790772341595e-06, "loss": 0.138, "step": 1097 }, { "epoch": 1.0242537313432836, "grad_norm": 0.723832294892521, "learning_rate": 4.80955771630053e-06, "loss": 0.1449, "step": 1098 }, { "epoch": 1.025186567164179, "grad_norm": 0.6606162069179766, "learning_rate": 4.8022367642634886e-06, "loss": 0.113, "step": 1099 }, { "epoch": 1.0261194029850746, "grad_norm": 0.626705757646172, "learning_rate": 4.794916236841773e-06, "loss": 0.0999, "step": 1100 }, { "epoch": 1.0270522388059702, "grad_norm": 0.6865803222400947, "learning_rate": 4.787596149753208e-06, "loss": 0.1064, "step": 1101 }, { "epoch": 1.0279850746268657, "grad_norm": 0.6813609382595731, "learning_rate": 4.780276518714675e-06, "loss": 0.1057, "step": 1102 }, { "epoch": 1.0289179104477613, "grad_norm": 0.6230047239348354, "learning_rate": 4.7729573594420765e-06, "loss": 0.0961, "step": 1103 }, { "epoch": 1.0298507462686568, "grad_norm": 0.5528180454497659, "learning_rate": 4.765638687650299e-06, "loss": 0.0694, "step": 1104 }, { "epoch": 1.0307835820895523, "grad_norm": 0.6316628738188933, "learning_rate": 4.758320519053186e-06, "loss": 0.096, "step": 1105 }, { "epoch": 1.0317164179104477, "grad_norm": 0.6304954107786697, "learning_rate": 4.7510028693635e-06, "loss": 0.0933, "step": 1106 }, { "epoch": 1.0326492537313432, "grad_norm": 0.7233065768231048, "learning_rate": 4.743685754292885e-06, "loss": 0.141, "step": 1107 }, { "epoch": 1.0335820895522387, "grad_norm": 0.6744448730964887, "learning_rate": 4.736369189551841e-06, "loss": 0.1274, "step": 1108 }, { "epoch": 1.0345149253731343, "grad_norm": 0.7685337114260238, "learning_rate": 4.729053190849686e-06, "loss": 0.1412, "step": 1109 }, { "epoch": 1.0354477611940298, "grad_norm": 0.6019167723488665, "learning_rate": 4.721737773894525e-06, "loss": 0.0876, "step": 1110 }, { "epoch": 1.0363805970149254, "grad_norm": 0.6337063116917955, "learning_rate": 4.714422954393208e-06, "loss": 0.1135, "step": 1111 }, { "epoch": 1.037313432835821, "grad_norm": 0.6663898994833303, "learning_rate": 4.7071087480513075e-06, "loss": 0.0989, "step": 1112 }, { "epoch": 1.0382462686567164, "grad_norm": 0.6885947424754459, "learning_rate": 4.699795170573078e-06, "loss": 0.1242, "step": 1113 }, { "epoch": 1.039179104477612, "grad_norm": 0.6326870925022272, "learning_rate": 4.692482237661421e-06, "loss": 0.0933, "step": 1114 }, { "epoch": 1.0401119402985075, "grad_norm": 0.6456319214132242, "learning_rate": 4.6851699650178595e-06, "loss": 0.1073, "step": 1115 }, { "epoch": 1.041044776119403, "grad_norm": 0.6731306300058962, "learning_rate": 4.677858368342495e-06, "loss": 0.1202, "step": 1116 }, { "epoch": 1.0419776119402986, "grad_norm": 0.6428995341159874, "learning_rate": 4.670547463333976e-06, "loss": 0.0923, "step": 1117 }, { "epoch": 1.0429104477611941, "grad_norm": 0.6393552968427241, "learning_rate": 4.66323726568947e-06, "loss": 0.0977, "step": 1118 }, { "epoch": 1.0438432835820897, "grad_norm": 0.6191140809098125, "learning_rate": 4.655927791104627e-06, "loss": 0.0915, "step": 1119 }, { "epoch": 1.044776119402985, "grad_norm": 0.6628895994328633, "learning_rate": 4.6486190552735375e-06, "loss": 0.1178, "step": 1120 }, { "epoch": 1.0457089552238805, "grad_norm": 0.6823659991240231, "learning_rate": 4.641311073888709e-06, "loss": 0.1178, "step": 1121 }, { "epoch": 1.046641791044776, "grad_norm": 0.6415413895316083, "learning_rate": 4.6340038626410335e-06, "loss": 0.1091, "step": 1122 }, { "epoch": 1.0475746268656716, "grad_norm": 0.6141158799592883, "learning_rate": 4.626697437219746e-06, "loss": 0.0811, "step": 1123 }, { "epoch": 1.0485074626865671, "grad_norm": 0.6224586086629333, "learning_rate": 4.619391813312391e-06, "loss": 0.1015, "step": 1124 }, { "epoch": 1.0494402985074627, "grad_norm": 0.6973140305339262, "learning_rate": 4.6120870066047976e-06, "loss": 0.1242, "step": 1125 }, { "epoch": 1.0503731343283582, "grad_norm": 0.6816900612903305, "learning_rate": 4.6047830327810396e-06, "loss": 0.1236, "step": 1126 }, { "epoch": 1.0513059701492538, "grad_norm": 0.6132188310138857, "learning_rate": 4.597479907523397e-06, "loss": 0.0938, "step": 1127 }, { "epoch": 1.0522388059701493, "grad_norm": 0.6766285304422819, "learning_rate": 4.590177646512335e-06, "loss": 0.102, "step": 1128 }, { "epoch": 1.0531716417910448, "grad_norm": 0.731085240466552, "learning_rate": 4.5828762654264595e-06, "loss": 0.1456, "step": 1129 }, { "epoch": 1.0541044776119404, "grad_norm": 0.7242539015279544, "learning_rate": 4.575575779942487e-06, "loss": 0.1414, "step": 1130 }, { "epoch": 1.055037313432836, "grad_norm": 0.6245524749788983, "learning_rate": 4.568276205735211e-06, "loss": 0.0945, "step": 1131 }, { "epoch": 1.0559701492537314, "grad_norm": 0.6127743616419338, "learning_rate": 4.560977558477471e-06, "loss": 0.0948, "step": 1132 }, { "epoch": 1.0569029850746268, "grad_norm": 0.6053128953462873, "learning_rate": 4.553679853840114e-06, "loss": 0.0869, "step": 1133 }, { "epoch": 1.0578358208955223, "grad_norm": 0.6611614496185652, "learning_rate": 4.546383107491963e-06, "loss": 0.1196, "step": 1134 }, { "epoch": 1.0587686567164178, "grad_norm": 0.6374771447163202, "learning_rate": 4.539087335099781e-06, "loss": 0.086, "step": 1135 }, { "epoch": 1.0597014925373134, "grad_norm": 0.6205615878819117, "learning_rate": 4.531792552328247e-06, "loss": 0.1183, "step": 1136 }, { "epoch": 1.060634328358209, "grad_norm": 0.6419116588556703, "learning_rate": 4.52449877483991e-06, "loss": 0.1184, "step": 1137 }, { "epoch": 1.0615671641791045, "grad_norm": 0.6445149001318629, "learning_rate": 4.51720601829516e-06, "loss": 0.1273, "step": 1138 }, { "epoch": 1.0625, "grad_norm": 0.7084759734189678, "learning_rate": 4.509914298352197e-06, "loss": 0.1474, "step": 1139 }, { "epoch": 1.0634328358208955, "grad_norm": 0.630272847164153, "learning_rate": 4.502623630666997e-06, "loss": 0.1088, "step": 1140 }, { "epoch": 1.064365671641791, "grad_norm": 0.6788567678620125, "learning_rate": 4.495334030893272e-06, "loss": 0.1171, "step": 1141 }, { "epoch": 1.0652985074626866, "grad_norm": 0.7279685908760443, "learning_rate": 4.488045514682444e-06, "loss": 0.137, "step": 1142 }, { "epoch": 1.0662313432835822, "grad_norm": 0.6737717037022103, "learning_rate": 4.480758097683608e-06, "loss": 0.1107, "step": 1143 }, { "epoch": 1.0671641791044777, "grad_norm": 0.7307523202068653, "learning_rate": 4.4734717955435e-06, "loss": 0.1192, "step": 1144 }, { "epoch": 1.0680970149253732, "grad_norm": 0.6436658950051306, "learning_rate": 4.466186623906462e-06, "loss": 0.0949, "step": 1145 }, { "epoch": 1.0690298507462686, "grad_norm": 0.7120738082816366, "learning_rate": 4.458902598414407e-06, "loss": 0.1166, "step": 1146 }, { "epoch": 1.069962686567164, "grad_norm": 0.6428908509322729, "learning_rate": 4.451619734706786e-06, "loss": 0.0878, "step": 1147 }, { "epoch": 1.0708955223880596, "grad_norm": 0.6806460918112655, "learning_rate": 4.44433804842056e-06, "loss": 0.1229, "step": 1148 }, { "epoch": 1.0718283582089552, "grad_norm": 0.7058301606057273, "learning_rate": 4.437057555190159e-06, "loss": 0.1267, "step": 1149 }, { "epoch": 1.0727611940298507, "grad_norm": 0.621331094406681, "learning_rate": 4.429778270647452e-06, "loss": 0.0954, "step": 1150 }, { "epoch": 1.0736940298507462, "grad_norm": 0.6910065767105272, "learning_rate": 4.422500210421713e-06, "loss": 0.1212, "step": 1151 }, { "epoch": 1.0746268656716418, "grad_norm": 0.6676827776372334, "learning_rate": 4.415223390139588e-06, "loss": 0.1077, "step": 1152 }, { "epoch": 1.0755597014925373, "grad_norm": 0.6820418619283747, "learning_rate": 4.40794782542506e-06, "loss": 0.1074, "step": 1153 }, { "epoch": 1.0764925373134329, "grad_norm": 0.7749533985219664, "learning_rate": 4.400673531899413e-06, "loss": 0.1373, "step": 1154 }, { "epoch": 1.0774253731343284, "grad_norm": 0.6877439612905237, "learning_rate": 4.393400525181208e-06, "loss": 0.1061, "step": 1155 }, { "epoch": 1.078358208955224, "grad_norm": 0.627711674173974, "learning_rate": 4.38612882088624e-06, "loss": 0.0949, "step": 1156 }, { "epoch": 1.0792910447761195, "grad_norm": 0.6397815815943224, "learning_rate": 4.378858434627504e-06, "loss": 0.0994, "step": 1157 }, { "epoch": 1.080223880597015, "grad_norm": 0.6738558875931577, "learning_rate": 4.371589382015171e-06, "loss": 0.1165, "step": 1158 }, { "epoch": 1.0811567164179103, "grad_norm": 0.5766893320514648, "learning_rate": 4.364321678656548e-06, "loss": 0.0734, "step": 1159 }, { "epoch": 1.0820895522388059, "grad_norm": 0.6842483214903616, "learning_rate": 4.357055340156041e-06, "loss": 0.1224, "step": 1160 }, { "epoch": 1.0830223880597014, "grad_norm": 0.6581819875147341, "learning_rate": 4.349790382115125e-06, "loss": 0.1089, "step": 1161 }, { "epoch": 1.083955223880597, "grad_norm": 0.6810169981620474, "learning_rate": 4.342526820132316e-06, "loss": 0.1242, "step": 1162 }, { "epoch": 1.0848880597014925, "grad_norm": 0.6593331662718219, "learning_rate": 4.335264669803131e-06, "loss": 0.1202, "step": 1163 }, { "epoch": 1.085820895522388, "grad_norm": 0.7074036088568522, "learning_rate": 4.328003946720053e-06, "loss": 0.1348, "step": 1164 }, { "epoch": 1.0867537313432836, "grad_norm": 0.6101481694933687, "learning_rate": 4.320744666472504e-06, "loss": 0.0952, "step": 1165 }, { "epoch": 1.087686567164179, "grad_norm": 0.6610071995845237, "learning_rate": 4.313486844646808e-06, "loss": 0.1126, "step": 1166 }, { "epoch": 1.0886194029850746, "grad_norm": 0.6674169469722171, "learning_rate": 4.3062304968261545e-06, "loss": 0.1095, "step": 1167 }, { "epoch": 1.0895522388059702, "grad_norm": 0.6358191326824404, "learning_rate": 4.2989756385905715e-06, "loss": 0.0907, "step": 1168 }, { "epoch": 1.0904850746268657, "grad_norm": 0.7298716106423773, "learning_rate": 4.291722285516887e-06, "loss": 0.1629, "step": 1169 }, { "epoch": 1.0914179104477613, "grad_norm": 0.6764701987356095, "learning_rate": 4.284470453178698e-06, "loss": 0.1265, "step": 1170 }, { "epoch": 1.0923507462686568, "grad_norm": 0.6521326257716065, "learning_rate": 4.277220157146335e-06, "loss": 0.1031, "step": 1171 }, { "epoch": 1.0932835820895523, "grad_norm": 0.6760085185105235, "learning_rate": 4.269971412986833e-06, "loss": 0.1105, "step": 1172 }, { "epoch": 1.0942164179104477, "grad_norm": 0.7261809326513253, "learning_rate": 4.262724236263892e-06, "loss": 0.1443, "step": 1173 }, { "epoch": 1.0951492537313432, "grad_norm": 0.6541970149403133, "learning_rate": 4.255478642537846e-06, "loss": 0.1016, "step": 1174 }, { "epoch": 1.0960820895522387, "grad_norm": 0.6302749676241577, "learning_rate": 4.248234647365632e-06, "loss": 0.1065, "step": 1175 }, { "epoch": 1.0970149253731343, "grad_norm": 0.6652021923834013, "learning_rate": 4.240992266300757e-06, "loss": 0.1228, "step": 1176 }, { "epoch": 1.0979477611940298, "grad_norm": 0.6836123963709407, "learning_rate": 4.233751514893257e-06, "loss": 0.1175, "step": 1177 }, { "epoch": 1.0988805970149254, "grad_norm": 0.6637605724820411, "learning_rate": 4.226512408689674e-06, "loss": 0.1048, "step": 1178 }, { "epoch": 1.099813432835821, "grad_norm": 0.6367663747470498, "learning_rate": 4.219274963233014e-06, "loss": 0.1108, "step": 1179 }, { "epoch": 1.1007462686567164, "grad_norm": 0.6666736102831348, "learning_rate": 4.212039194062718e-06, "loss": 0.1211, "step": 1180 }, { "epoch": 1.101679104477612, "grad_norm": 0.7147841896439374, "learning_rate": 4.20480511671463e-06, "loss": 0.1196, "step": 1181 }, { "epoch": 1.1026119402985075, "grad_norm": 0.7253957753133302, "learning_rate": 4.19757274672096e-06, "loss": 0.1529, "step": 1182 }, { "epoch": 1.103544776119403, "grad_norm": 0.6542465637953488, "learning_rate": 4.1903420996102515e-06, "loss": 0.1237, "step": 1183 }, { "epoch": 1.1044776119402986, "grad_norm": 0.642672339779944, "learning_rate": 4.183113190907349e-06, "loss": 0.1031, "step": 1184 }, { "epoch": 1.1054104477611941, "grad_norm": 0.662512017033947, "learning_rate": 4.175886036133366e-06, "loss": 0.1213, "step": 1185 }, { "epoch": 1.1063432835820897, "grad_norm": 0.6878497499425189, "learning_rate": 4.16866065080565e-06, "loss": 0.112, "step": 1186 }, { "epoch": 1.107276119402985, "grad_norm": 0.6429279596684814, "learning_rate": 4.161437050437746e-06, "loss": 0.1024, "step": 1187 }, { "epoch": 1.1082089552238805, "grad_norm": 0.642680225890338, "learning_rate": 4.1542152505393694e-06, "loss": 0.1032, "step": 1188 }, { "epoch": 1.109141791044776, "grad_norm": 0.7124569967888377, "learning_rate": 4.146995266616371e-06, "loss": 0.1359, "step": 1189 }, { "epoch": 1.1100746268656716, "grad_norm": 0.7101959046319726, "learning_rate": 4.1397771141706995e-06, "loss": 0.1235, "step": 1190 }, { "epoch": 1.1110074626865671, "grad_norm": 0.6933933548597923, "learning_rate": 4.132560808700374e-06, "loss": 0.111, "step": 1191 }, { "epoch": 1.1119402985074627, "grad_norm": 0.677855331872483, "learning_rate": 4.125346365699446e-06, "loss": 0.126, "step": 1192 }, { "epoch": 1.1128731343283582, "grad_norm": 0.6375643966302513, "learning_rate": 4.118133800657968e-06, "loss": 0.1002, "step": 1193 }, { "epoch": 1.1138059701492538, "grad_norm": 0.6972068356420358, "learning_rate": 4.110923129061961e-06, "loss": 0.1208, "step": 1194 }, { "epoch": 1.1147388059701493, "grad_norm": 0.6786054106128122, "learning_rate": 4.103714366393383e-06, "loss": 0.1062, "step": 1195 }, { "epoch": 1.1156716417910448, "grad_norm": 0.6268696545779459, "learning_rate": 4.09650752813009e-06, "loss": 0.092, "step": 1196 }, { "epoch": 1.1166044776119404, "grad_norm": 0.6153332021807034, "learning_rate": 4.089302629745806e-06, "loss": 0.0954, "step": 1197 }, { "epoch": 1.117537313432836, "grad_norm": 0.6414614450012807, "learning_rate": 4.082099686710093e-06, "loss": 0.1054, "step": 1198 }, { "epoch": 1.1184701492537314, "grad_norm": 0.6544762007882718, "learning_rate": 4.074898714488313e-06, "loss": 0.11, "step": 1199 }, { "epoch": 1.1194029850746268, "grad_norm": 0.6536450807860814, "learning_rate": 4.067699728541595e-06, "loss": 0.099, "step": 1200 }, { "epoch": 1.1203358208955223, "grad_norm": 0.6274108722848706, "learning_rate": 4.060502744326805e-06, "loss": 0.0877, "step": 1201 }, { "epoch": 1.1212686567164178, "grad_norm": 0.6471221060970316, "learning_rate": 4.053307777296511e-06, "loss": 0.098, "step": 1202 }, { "epoch": 1.1222014925373134, "grad_norm": 0.6954554545220338, "learning_rate": 4.046114842898948e-06, "loss": 0.1219, "step": 1203 }, { "epoch": 1.123134328358209, "grad_norm": 0.6783731574846326, "learning_rate": 4.03892395657799e-06, "loss": 0.1061, "step": 1204 }, { "epoch": 1.1240671641791045, "grad_norm": 0.6141120184729779, "learning_rate": 4.031735133773113e-06, "loss": 0.0978, "step": 1205 }, { "epoch": 1.125, "grad_norm": 0.6798461227160307, "learning_rate": 4.02454838991936e-06, "loss": 0.1202, "step": 1206 }, { "epoch": 1.1259328358208955, "grad_norm": 0.6660825774437203, "learning_rate": 4.0173637404473105e-06, "loss": 0.1143, "step": 1207 }, { "epoch": 1.126865671641791, "grad_norm": 0.6317663354809716, "learning_rate": 4.010181200783052e-06, "loss": 0.0965, "step": 1208 }, { "epoch": 1.1277985074626866, "grad_norm": 0.6817422481908203, "learning_rate": 4.003000786348135e-06, "loss": 0.1168, "step": 1209 }, { "epoch": 1.1287313432835822, "grad_norm": 0.6255185461311801, "learning_rate": 3.995822512559552e-06, "loss": 0.1077, "step": 1210 }, { "epoch": 1.1296641791044777, "grad_norm": 0.6058872719687594, "learning_rate": 3.988646394829699e-06, "loss": 0.0877, "step": 1211 }, { "epoch": 1.1305970149253732, "grad_norm": 0.6632440067617557, "learning_rate": 3.981472448566339e-06, "loss": 0.1083, "step": 1212 }, { "epoch": 1.1315298507462686, "grad_norm": 0.6485791047386802, "learning_rate": 3.974300689172579e-06, "loss": 0.1026, "step": 1213 }, { "epoch": 1.132462686567164, "grad_norm": 0.6705588232909454, "learning_rate": 3.967131132046822e-06, "loss": 0.1331, "step": 1214 }, { "epoch": 1.1333955223880596, "grad_norm": 0.7004650365449027, "learning_rate": 3.9599637925827495e-06, "loss": 0.1014, "step": 1215 }, { "epoch": 1.1343283582089552, "grad_norm": 0.7128630354108189, "learning_rate": 3.952798686169279e-06, "loss": 0.1295, "step": 1216 }, { "epoch": 1.1352611940298507, "grad_norm": 0.669303981062534, "learning_rate": 3.945635828190534e-06, "loss": 0.1173, "step": 1217 }, { "epoch": 1.1361940298507462, "grad_norm": 0.6198907036651744, "learning_rate": 3.938475234025812e-06, "loss": 0.099, "step": 1218 }, { "epoch": 1.1371268656716418, "grad_norm": 0.7027166044387859, "learning_rate": 3.931316919049544e-06, "loss": 0.1416, "step": 1219 }, { "epoch": 1.1380597014925373, "grad_norm": 0.6797563497337304, "learning_rate": 3.924160898631274e-06, "loss": 0.1291, "step": 1220 }, { "epoch": 1.1389925373134329, "grad_norm": 0.638516075552271, "learning_rate": 3.917007188135618e-06, "loss": 0.0947, "step": 1221 }, { "epoch": 1.1399253731343284, "grad_norm": 0.6798133720679237, "learning_rate": 3.9098558029222275e-06, "loss": 0.1288, "step": 1222 }, { "epoch": 1.140858208955224, "grad_norm": 0.7703289763668479, "learning_rate": 3.902706758345768e-06, "loss": 0.142, "step": 1223 }, { "epoch": 1.1417910447761195, "grad_norm": 0.6871488762055015, "learning_rate": 3.8955600697558764e-06, "loss": 0.1351, "step": 1224 }, { "epoch": 1.142723880597015, "grad_norm": 0.6902528839523053, "learning_rate": 3.88841575249713e-06, "loss": 0.1418, "step": 1225 }, { "epoch": 1.1436567164179103, "grad_norm": 0.629499954494216, "learning_rate": 3.8812738219090165e-06, "loss": 0.0995, "step": 1226 }, { "epoch": 1.1445895522388059, "grad_norm": 0.6636520751808409, "learning_rate": 3.874134293325901e-06, "loss": 0.1179, "step": 1227 }, { "epoch": 1.1455223880597014, "grad_norm": 0.7296116464323124, "learning_rate": 3.866997182076985e-06, "loss": 0.1236, "step": 1228 }, { "epoch": 1.146455223880597, "grad_norm": 0.7099698036640795, "learning_rate": 3.8598625034862834e-06, "loss": 0.1267, "step": 1229 }, { "epoch": 1.1473880597014925, "grad_norm": 0.6695707186816839, "learning_rate": 3.8527302728725906e-06, "loss": 0.1244, "step": 1230 }, { "epoch": 1.148320895522388, "grad_norm": 0.6439369629515325, "learning_rate": 3.845600505549443e-06, "loss": 0.1005, "step": 1231 }, { "epoch": 1.1492537313432836, "grad_norm": 0.6729066900360723, "learning_rate": 3.838473216825085e-06, "loss": 0.1045, "step": 1232 }, { "epoch": 1.150186567164179, "grad_norm": 0.629344123569514, "learning_rate": 3.8313484220024434e-06, "loss": 0.1002, "step": 1233 }, { "epoch": 1.1511194029850746, "grad_norm": 0.6822428732123457, "learning_rate": 3.82422613637909e-06, "loss": 0.1331, "step": 1234 }, { "epoch": 1.1520522388059702, "grad_norm": 0.699685380974735, "learning_rate": 3.817106375247205e-06, "loss": 0.1184, "step": 1235 }, { "epoch": 1.1529850746268657, "grad_norm": 0.656296758147819, "learning_rate": 3.809989153893554e-06, "loss": 0.1246, "step": 1236 }, { "epoch": 1.1539179104477613, "grad_norm": 0.6709199794149847, "learning_rate": 3.802874487599447e-06, "loss": 0.1073, "step": 1237 }, { "epoch": 1.1548507462686568, "grad_norm": 0.656860170354514, "learning_rate": 3.795762391640705e-06, "loss": 0.1237, "step": 1238 }, { "epoch": 1.1557835820895521, "grad_norm": 0.7112578771402778, "learning_rate": 3.788652881287635e-06, "loss": 0.1296, "step": 1239 }, { "epoch": 1.1567164179104479, "grad_norm": 0.6863672228721653, "learning_rate": 3.781545971804992e-06, "loss": 0.1282, "step": 1240 }, { "epoch": 1.1576492537313432, "grad_norm": 0.6318898901920856, "learning_rate": 3.774441678451943e-06, "loss": 0.1039, "step": 1241 }, { "epoch": 1.1585820895522387, "grad_norm": 0.6699208312190958, "learning_rate": 3.767340016482039e-06, "loss": 0.117, "step": 1242 }, { "epoch": 1.1595149253731343, "grad_norm": 0.6955068206849431, "learning_rate": 3.7602410011431837e-06, "loss": 0.129, "step": 1243 }, { "epoch": 1.1604477611940298, "grad_norm": 0.6500535257694892, "learning_rate": 3.753144647677599e-06, "loss": 0.1165, "step": 1244 }, { "epoch": 1.1613805970149254, "grad_norm": 0.622359244785789, "learning_rate": 3.7460509713217863e-06, "loss": 0.0958, "step": 1245 }, { "epoch": 1.162313432835821, "grad_norm": 0.7325277455372621, "learning_rate": 3.7389599873065034e-06, "loss": 0.1472, "step": 1246 }, { "epoch": 1.1632462686567164, "grad_norm": 0.7211165666943816, "learning_rate": 3.731871710856727e-06, "loss": 0.1517, "step": 1247 }, { "epoch": 1.164179104477612, "grad_norm": 0.6774804744201163, "learning_rate": 3.7247861571916183e-06, "loss": 0.1259, "step": 1248 }, { "epoch": 1.1651119402985075, "grad_norm": 0.6496174963831818, "learning_rate": 3.717703341524494e-06, "loss": 0.1017, "step": 1249 }, { "epoch": 1.166044776119403, "grad_norm": 0.6355218831883176, "learning_rate": 3.7106232790627926e-06, "loss": 0.1112, "step": 1250 }, { "epoch": 1.1669776119402986, "grad_norm": 0.6893185596127996, "learning_rate": 3.7035459850080392e-06, "loss": 0.1409, "step": 1251 }, { "epoch": 1.1679104477611941, "grad_norm": 0.6338274713305431, "learning_rate": 3.696471474555816e-06, "loss": 0.1051, "step": 1252 }, { "epoch": 1.1688432835820897, "grad_norm": 0.6493563297264361, "learning_rate": 3.6893997628957314e-06, "loss": 0.1081, "step": 1253 }, { "epoch": 1.169776119402985, "grad_norm": 0.6446878202362214, "learning_rate": 3.6823308652113783e-06, "loss": 0.0949, "step": 1254 }, { "epoch": 1.1707089552238805, "grad_norm": 0.6385965701292257, "learning_rate": 3.6752647966803114e-06, "loss": 0.1056, "step": 1255 }, { "epoch": 1.171641791044776, "grad_norm": 0.6466651425045786, "learning_rate": 3.6682015724740116e-06, "loss": 0.0996, "step": 1256 }, { "epoch": 1.1725746268656716, "grad_norm": 0.6966069902989482, "learning_rate": 3.661141207757854e-06, "loss": 0.1265, "step": 1257 }, { "epoch": 1.1735074626865671, "grad_norm": 0.6731902752409509, "learning_rate": 3.6540837176910688e-06, "loss": 0.1053, "step": 1258 }, { "epoch": 1.1744402985074627, "grad_norm": 0.6043150356727226, "learning_rate": 3.6470291174267187e-06, "loss": 0.1012, "step": 1259 }, { "epoch": 1.1753731343283582, "grad_norm": 0.6810674309994448, "learning_rate": 3.6399774221116613e-06, "loss": 0.1139, "step": 1260 }, { "epoch": 1.1763059701492538, "grad_norm": 0.6616697091473649, "learning_rate": 3.6329286468865143e-06, "loss": 0.1188, "step": 1261 }, { "epoch": 1.1772388059701493, "grad_norm": 0.679230472698551, "learning_rate": 3.625882806885629e-06, "loss": 0.1125, "step": 1262 }, { "epoch": 1.1781716417910448, "grad_norm": 0.6720289402751944, "learning_rate": 3.6188399172370526e-06, "loss": 0.1291, "step": 1263 }, { "epoch": 1.1791044776119404, "grad_norm": 0.6636654903565365, "learning_rate": 3.611799993062497e-06, "loss": 0.1133, "step": 1264 }, { "epoch": 1.180037313432836, "grad_norm": 0.6909023605182864, "learning_rate": 3.6047630494773093e-06, "loss": 0.1285, "step": 1265 }, { "epoch": 1.1809701492537314, "grad_norm": 0.6118165193632887, "learning_rate": 3.597729101590436e-06, "loss": 0.0903, "step": 1266 }, { "epoch": 1.1819029850746268, "grad_norm": 0.6594698879585971, "learning_rate": 3.590698164504391e-06, "loss": 0.1154, "step": 1267 }, { "epoch": 1.1828358208955223, "grad_norm": 0.6597906596330906, "learning_rate": 3.583670253315223e-06, "loss": 0.1062, "step": 1268 }, { "epoch": 1.1837686567164178, "grad_norm": 0.673849325482553, "learning_rate": 3.576645383112485e-06, "loss": 0.1148, "step": 1269 }, { "epoch": 1.1847014925373134, "grad_norm": 0.6649023363699672, "learning_rate": 3.5696235689792e-06, "loss": 0.1055, "step": 1270 }, { "epoch": 1.185634328358209, "grad_norm": 0.6498100761792511, "learning_rate": 3.5626048259918324e-06, "loss": 0.0949, "step": 1271 }, { "epoch": 1.1865671641791045, "grad_norm": 0.6945025770415555, "learning_rate": 3.5555891692202475e-06, "loss": 0.1175, "step": 1272 }, { "epoch": 1.1875, "grad_norm": 0.7269538898072373, "learning_rate": 3.5485766137276894e-06, "loss": 0.1378, "step": 1273 }, { "epoch": 1.1884328358208955, "grad_norm": 0.6700371167547269, "learning_rate": 3.5415671745707383e-06, "loss": 0.1089, "step": 1274 }, { "epoch": 1.189365671641791, "grad_norm": 0.6841775404519547, "learning_rate": 3.5345608667992863e-06, "loss": 0.1222, "step": 1275 }, { "epoch": 1.1902985074626866, "grad_norm": 0.5880283436306308, "learning_rate": 3.5275577054565047e-06, "loss": 0.0769, "step": 1276 }, { "epoch": 1.1912313432835822, "grad_norm": 0.5914514237812865, "learning_rate": 3.520557705578802e-06, "loss": 0.0857, "step": 1277 }, { "epoch": 1.1921641791044777, "grad_norm": 0.6794800744212194, "learning_rate": 3.5135608821958055e-06, "loss": 0.1228, "step": 1278 }, { "epoch": 1.1930970149253732, "grad_norm": 0.7213801610974544, "learning_rate": 3.5065672503303204e-06, "loss": 0.1105, "step": 1279 }, { "epoch": 1.1940298507462686, "grad_norm": 0.6763612814161982, "learning_rate": 3.4995768249982975e-06, "loss": 0.1053, "step": 1280 }, { "epoch": 1.194962686567164, "grad_norm": 0.6854490730715602, "learning_rate": 3.492589621208804e-06, "loss": 0.126, "step": 1281 }, { "epoch": 1.1958955223880596, "grad_norm": 0.6726123123827632, "learning_rate": 3.4856056539639906e-06, "loss": 0.1016, "step": 1282 }, { "epoch": 1.1968283582089552, "grad_norm": 0.605436355362301, "learning_rate": 3.4786249382590575e-06, "loss": 0.0799, "step": 1283 }, { "epoch": 1.1977611940298507, "grad_norm": 0.7385276002637254, "learning_rate": 3.471647489082227e-06, "loss": 0.1099, "step": 1284 }, { "epoch": 1.1986940298507462, "grad_norm": 0.6780229222911421, "learning_rate": 3.4646733214147037e-06, "loss": 0.1072, "step": 1285 }, { "epoch": 1.1996268656716418, "grad_norm": 0.6621134182911969, "learning_rate": 3.457702450230649e-06, "loss": 0.0964, "step": 1286 }, { "epoch": 1.2005597014925373, "grad_norm": 0.6496505303045654, "learning_rate": 3.450734890497146e-06, "loss": 0.0955, "step": 1287 }, { "epoch": 1.2014925373134329, "grad_norm": 0.6674572149964947, "learning_rate": 3.443770657174166e-06, "loss": 0.1096, "step": 1288 }, { "epoch": 1.2024253731343284, "grad_norm": 0.6331259512905991, "learning_rate": 3.4368097652145416e-06, "loss": 0.1004, "step": 1289 }, { "epoch": 1.203358208955224, "grad_norm": 0.6809895359891708, "learning_rate": 3.4298522295639298e-06, "loss": 0.1176, "step": 1290 }, { "epoch": 1.2042910447761195, "grad_norm": 0.5985978878230243, "learning_rate": 3.4228980651607787e-06, "loss": 0.0782, "step": 1291 }, { "epoch": 1.205223880597015, "grad_norm": 0.7033093827732854, "learning_rate": 3.415947286936301e-06, "loss": 0.1257, "step": 1292 }, { "epoch": 1.2061567164179103, "grad_norm": 0.6725941581863099, "learning_rate": 3.40899990981444e-06, "loss": 0.1054, "step": 1293 }, { "epoch": 1.2070895522388059, "grad_norm": 0.6419830935015188, "learning_rate": 3.4020559487118337e-06, "loss": 0.0943, "step": 1294 }, { "epoch": 1.2080223880597014, "grad_norm": 0.6800424185513414, "learning_rate": 3.3951154185377843e-06, "loss": 0.1061, "step": 1295 }, { "epoch": 1.208955223880597, "grad_norm": 0.6549219184994775, "learning_rate": 3.388178334194232e-06, "loss": 0.0948, "step": 1296 }, { "epoch": 1.2098880597014925, "grad_norm": 0.7110482110993865, "learning_rate": 3.381244710575717e-06, "loss": 0.1406, "step": 1297 }, { "epoch": 1.210820895522388, "grad_norm": 0.639888639310854, "learning_rate": 3.3743145625693456e-06, "loss": 0.1084, "step": 1298 }, { "epoch": 1.2117537313432836, "grad_norm": 0.6583730767819896, "learning_rate": 3.3673879050547664e-06, "loss": 0.102, "step": 1299 }, { "epoch": 1.212686567164179, "grad_norm": 0.7248030574293371, "learning_rate": 3.360464752904132e-06, "loss": 0.1441, "step": 1300 }, { "epoch": 1.2136194029850746, "grad_norm": 0.6997702846318571, "learning_rate": 3.353545120982067e-06, "loss": 0.1359, "step": 1301 }, { "epoch": 1.2145522388059702, "grad_norm": 0.6456093067908237, "learning_rate": 3.346629024145639e-06, "loss": 0.1025, "step": 1302 }, { "epoch": 1.2154850746268657, "grad_norm": 0.6745218497301696, "learning_rate": 3.3397164772443274e-06, "loss": 0.1206, "step": 1303 }, { "epoch": 1.2164179104477613, "grad_norm": 0.6729285663472476, "learning_rate": 3.3328074951199846e-06, "loss": 0.1065, "step": 1304 }, { "epoch": 1.2173507462686568, "grad_norm": 0.6558917200832336, "learning_rate": 3.325902092606814e-06, "loss": 0.0914, "step": 1305 }, { "epoch": 1.2182835820895521, "grad_norm": 0.6322406977452455, "learning_rate": 3.319000284531332e-06, "loss": 0.1057, "step": 1306 }, { "epoch": 1.2192164179104479, "grad_norm": 0.6787954425275543, "learning_rate": 3.3121020857123364e-06, "loss": 0.1234, "step": 1307 }, { "epoch": 1.2201492537313432, "grad_norm": 0.6783395264101006, "learning_rate": 3.3052075109608734e-06, "loss": 0.1293, "step": 1308 }, { "epoch": 1.2210820895522387, "grad_norm": 0.6395455471743214, "learning_rate": 3.2983165750802127e-06, "loss": 0.1008, "step": 1309 }, { "epoch": 1.2220149253731343, "grad_norm": 0.6599017210544008, "learning_rate": 3.29142929286581e-06, "loss": 0.1127, "step": 1310 }, { "epoch": 1.2229477611940298, "grad_norm": 0.6721996065352651, "learning_rate": 3.2845456791052733e-06, "loss": 0.1274, "step": 1311 }, { "epoch": 1.2238805970149254, "grad_norm": 0.66346384322633, "learning_rate": 3.2776657485783357e-06, "loss": 0.0974, "step": 1312 }, { "epoch": 1.224813432835821, "grad_norm": 0.6918456096249572, "learning_rate": 3.2707895160568255e-06, "loss": 0.1262, "step": 1313 }, { "epoch": 1.2257462686567164, "grad_norm": 0.6507879109084083, "learning_rate": 3.263916996304624e-06, "loss": 0.1021, "step": 1314 }, { "epoch": 1.226679104477612, "grad_norm": 0.6407489265695984, "learning_rate": 3.257048204077647e-06, "loss": 0.1014, "step": 1315 }, { "epoch": 1.2276119402985075, "grad_norm": 0.6568220158840882, "learning_rate": 3.2501831541238048e-06, "loss": 0.0978, "step": 1316 }, { "epoch": 1.228544776119403, "grad_norm": 0.6572225655923793, "learning_rate": 3.2433218611829713e-06, "loss": 0.1147, "step": 1317 }, { "epoch": 1.2294776119402986, "grad_norm": 0.7131738837343997, "learning_rate": 3.236464339986956e-06, "loss": 0.1177, "step": 1318 }, { "epoch": 1.2304104477611941, "grad_norm": 0.6839052224099113, "learning_rate": 3.22961060525947e-06, "loss": 0.1158, "step": 1319 }, { "epoch": 1.2313432835820897, "grad_norm": 0.6924167449541818, "learning_rate": 3.2227606717160944e-06, "loss": 0.1218, "step": 1320 }, { "epoch": 1.232276119402985, "grad_norm": 0.6485365260724294, "learning_rate": 3.2159145540642433e-06, "loss": 0.1027, "step": 1321 }, { "epoch": 1.2332089552238805, "grad_norm": 0.6455158510795432, "learning_rate": 3.2090722670031465e-06, "loss": 0.0947, "step": 1322 }, { "epoch": 1.234141791044776, "grad_norm": 0.6545772605036551, "learning_rate": 3.2022338252238062e-06, "loss": 0.0943, "step": 1323 }, { "epoch": 1.2350746268656716, "grad_norm": 0.626604917877608, "learning_rate": 3.1953992434089643e-06, "loss": 0.102, "step": 1324 }, { "epoch": 1.2360074626865671, "grad_norm": 0.6715756338521263, "learning_rate": 3.18856853623308e-06, "loss": 0.1097, "step": 1325 }, { "epoch": 1.2369402985074627, "grad_norm": 0.6643618562936457, "learning_rate": 3.1817417183622915e-06, "loss": 0.1252, "step": 1326 }, { "epoch": 1.2378731343283582, "grad_norm": 0.6852168367113268, "learning_rate": 3.1749188044543865e-06, "loss": 0.1259, "step": 1327 }, { "epoch": 1.2388059701492538, "grad_norm": 0.6097404607090176, "learning_rate": 3.168099809158769e-06, "loss": 0.081, "step": 1328 }, { "epoch": 1.2397388059701493, "grad_norm": 0.638321980426531, "learning_rate": 3.1612847471164335e-06, "loss": 0.1203, "step": 1329 }, { "epoch": 1.2406716417910448, "grad_norm": 0.6615634344194884, "learning_rate": 3.1544736329599248e-06, "loss": 0.0981, "step": 1330 }, { "epoch": 1.2416044776119404, "grad_norm": 0.6632722240312284, "learning_rate": 3.1476664813133118e-06, "loss": 0.1043, "step": 1331 }, { "epoch": 1.242537313432836, "grad_norm": 0.6972617116276777, "learning_rate": 3.140863306792161e-06, "loss": 0.123, "step": 1332 }, { "epoch": 1.2434701492537314, "grad_norm": 0.6511503770132678, "learning_rate": 3.1340641240034907e-06, "loss": 0.1014, "step": 1333 }, { "epoch": 1.2444029850746268, "grad_norm": 0.6495210114663997, "learning_rate": 3.1272689475457592e-06, "loss": 0.1179, "step": 1334 }, { "epoch": 1.2453358208955223, "grad_norm": 0.6346029738066005, "learning_rate": 3.1204777920088108e-06, "loss": 0.1082, "step": 1335 }, { "epoch": 1.2462686567164178, "grad_norm": 0.6379478107998581, "learning_rate": 3.113690671973867e-06, "loss": 0.0924, "step": 1336 }, { "epoch": 1.2472014925373134, "grad_norm": 0.672163836453481, "learning_rate": 3.1069076020134785e-06, "loss": 0.1047, "step": 1337 }, { "epoch": 1.248134328358209, "grad_norm": 0.6970655236084273, "learning_rate": 3.100128596691503e-06, "loss": 0.1337, "step": 1338 }, { "epoch": 1.2490671641791045, "grad_norm": 0.6048363722870265, "learning_rate": 3.093353670563071e-06, "loss": 0.0853, "step": 1339 }, { "epoch": 1.25, "grad_norm": 0.6316307168594887, "learning_rate": 3.0865828381745515e-06, "loss": 0.0963, "step": 1340 }, { "epoch": 1.2509328358208955, "grad_norm": 0.6529829052559427, "learning_rate": 3.0798161140635287e-06, "loss": 0.1108, "step": 1341 }, { "epoch": 1.251865671641791, "grad_norm": 0.6916764991761619, "learning_rate": 3.0730535127587626e-06, "loss": 0.1189, "step": 1342 }, { "epoch": 1.2527985074626866, "grad_norm": 0.6798155591950784, "learning_rate": 3.0662950487801614e-06, "loss": 0.1194, "step": 1343 }, { "epoch": 1.2537313432835822, "grad_norm": 0.6677314491896892, "learning_rate": 3.059540736638751e-06, "loss": 0.0951, "step": 1344 }, { "epoch": 1.2546641791044777, "grad_norm": 0.7300641017849979, "learning_rate": 3.052790590836644e-06, "loss": 0.1508, "step": 1345 }, { "epoch": 1.2555970149253732, "grad_norm": 0.6785107758579306, "learning_rate": 3.046044625867004e-06, "loss": 0.1407, "step": 1346 }, { "epoch": 1.2565298507462686, "grad_norm": 0.6879676592585647, "learning_rate": 3.0393028562140237e-06, "loss": 0.1251, "step": 1347 }, { "epoch": 1.2574626865671643, "grad_norm": 0.6257376997722093, "learning_rate": 3.0325652963528797e-06, "loss": 0.1215, "step": 1348 }, { "epoch": 1.2583955223880596, "grad_norm": 0.677035881240514, "learning_rate": 3.0258319607497175e-06, "loss": 0.1046, "step": 1349 }, { "epoch": 1.2593283582089552, "grad_norm": 0.6357996063079804, "learning_rate": 3.0191028638616095e-06, "loss": 0.1039, "step": 1350 }, { "epoch": 1.2602611940298507, "grad_norm": 0.6953761362615961, "learning_rate": 3.012378020136526e-06, "loss": 0.1277, "step": 1351 }, { "epoch": 1.2611940298507462, "grad_norm": 0.6478439417861661, "learning_rate": 3.0056574440133104e-06, "loss": 0.1094, "step": 1352 }, { "epoch": 1.2621268656716418, "grad_norm": 0.6985811700612619, "learning_rate": 2.9989411499216357e-06, "loss": 0.1244, "step": 1353 }, { "epoch": 1.2630597014925373, "grad_norm": 0.6982088271959584, "learning_rate": 2.992229152281987e-06, "loss": 0.1463, "step": 1354 }, { "epoch": 1.2639925373134329, "grad_norm": 0.7384828865348646, "learning_rate": 2.9855214655056243e-06, "loss": 0.1483, "step": 1355 }, { "epoch": 1.2649253731343284, "grad_norm": 0.7034165115040941, "learning_rate": 2.978818103994546e-06, "loss": 0.1221, "step": 1356 }, { "epoch": 1.265858208955224, "grad_norm": 0.6809070664758655, "learning_rate": 2.9721190821414713e-06, "loss": 0.1195, "step": 1357 }, { "epoch": 1.2667910447761195, "grad_norm": 0.6278586710957271, "learning_rate": 2.9654244143297972e-06, "loss": 0.0936, "step": 1358 }, { "epoch": 1.267723880597015, "grad_norm": 0.7011233449543324, "learning_rate": 2.9587341149335726e-06, "loss": 0.1362, "step": 1359 }, { "epoch": 1.2686567164179103, "grad_norm": 0.6127515580785652, "learning_rate": 2.9520481983174675e-06, "loss": 0.0963, "step": 1360 }, { "epoch": 1.269589552238806, "grad_norm": 0.6917089414307945, "learning_rate": 2.945366678836745e-06, "loss": 0.1312, "step": 1361 }, { "epoch": 1.2705223880597014, "grad_norm": 0.6421147608211035, "learning_rate": 2.9386895708372205e-06, "loss": 0.094, "step": 1362 }, { "epoch": 1.271455223880597, "grad_norm": 0.6590776799565228, "learning_rate": 2.932016888655241e-06, "loss": 0.111, "step": 1363 }, { "epoch": 1.2723880597014925, "grad_norm": 0.7362906391219698, "learning_rate": 2.9253486466176516e-06, "loss": 0.1443, "step": 1364 }, { "epoch": 1.273320895522388, "grad_norm": 0.6717848928752341, "learning_rate": 2.9186848590417654e-06, "loss": 0.1289, "step": 1365 }, { "epoch": 1.2742537313432836, "grad_norm": 0.6623906468425126, "learning_rate": 2.912025540235327e-06, "loss": 0.1019, "step": 1366 }, { "epoch": 1.275186567164179, "grad_norm": 0.7081086408163834, "learning_rate": 2.9053707044964886e-06, "loss": 0.1224, "step": 1367 }, { "epoch": 1.2761194029850746, "grad_norm": 0.6899619505862242, "learning_rate": 2.8987203661137776e-06, "loss": 0.119, "step": 1368 }, { "epoch": 1.2770522388059702, "grad_norm": 0.6930521875939615, "learning_rate": 2.8920745393660642e-06, "loss": 0.1341, "step": 1369 }, { "epoch": 1.2779850746268657, "grad_norm": 0.5935560423379292, "learning_rate": 2.885433238522534e-06, "loss": 0.0884, "step": 1370 }, { "epoch": 1.2789179104477613, "grad_norm": 0.6804550342928365, "learning_rate": 2.878796477842648e-06, "loss": 0.1205, "step": 1371 }, { "epoch": 1.2798507462686568, "grad_norm": 0.6465520549051847, "learning_rate": 2.8721642715761267e-06, "loss": 0.0925, "step": 1372 }, { "epoch": 1.2807835820895521, "grad_norm": 0.6759109027662024, "learning_rate": 2.8655366339629093e-06, "loss": 0.1107, "step": 1373 }, { "epoch": 1.2817164179104479, "grad_norm": 0.6390777869050508, "learning_rate": 2.858913579233127e-06, "loss": 0.0999, "step": 1374 }, { "epoch": 1.2826492537313432, "grad_norm": 0.6556734624899093, "learning_rate": 2.852295121607066e-06, "loss": 0.1146, "step": 1375 }, { "epoch": 1.2835820895522387, "grad_norm": 0.6847107683384561, "learning_rate": 2.8456812752951483e-06, "loss": 0.1232, "step": 1376 }, { "epoch": 1.2845149253731343, "grad_norm": 0.6203561796931452, "learning_rate": 2.8390720544978933e-06, "loss": 0.0935, "step": 1377 }, { "epoch": 1.2854477611940298, "grad_norm": 0.6127513659268049, "learning_rate": 2.8324674734058855e-06, "loss": 0.0976, "step": 1378 }, { "epoch": 1.2863805970149254, "grad_norm": 0.6322072358012825, "learning_rate": 2.8258675461997513e-06, "loss": 0.0945, "step": 1379 }, { "epoch": 1.287313432835821, "grad_norm": 0.6134942889587948, "learning_rate": 2.8192722870501242e-06, "loss": 0.1007, "step": 1380 }, { "epoch": 1.2882462686567164, "grad_norm": 0.6314483292278474, "learning_rate": 2.812681710117614e-06, "loss": 0.0958, "step": 1381 }, { "epoch": 1.289179104477612, "grad_norm": 0.7083802359263354, "learning_rate": 2.8060958295527785e-06, "loss": 0.1301, "step": 1382 }, { "epoch": 1.2901119402985075, "grad_norm": 0.691251753484189, "learning_rate": 2.799514659496092e-06, "loss": 0.1017, "step": 1383 }, { "epoch": 1.291044776119403, "grad_norm": 0.633503488674255, "learning_rate": 2.792938214077912e-06, "loss": 0.0959, "step": 1384 }, { "epoch": 1.2919776119402986, "grad_norm": 0.6733955134345084, "learning_rate": 2.7863665074184553e-06, "loss": 0.1206, "step": 1385 }, { "epoch": 1.292910447761194, "grad_norm": 0.6368095176903747, "learning_rate": 2.7797995536277624e-06, "loss": 0.1043, "step": 1386 }, { "epoch": 1.2938432835820897, "grad_norm": 0.6529819993111272, "learning_rate": 2.773237366805672e-06, "loss": 0.1016, "step": 1387 }, { "epoch": 1.294776119402985, "grad_norm": 0.6840791052284649, "learning_rate": 2.766679961041781e-06, "loss": 0.1247, "step": 1388 }, { "epoch": 1.2957089552238805, "grad_norm": 0.7111204845968008, "learning_rate": 2.760127350415427e-06, "loss": 0.1095, "step": 1389 }, { "epoch": 1.296641791044776, "grad_norm": 0.7203503684040325, "learning_rate": 2.753579548995652e-06, "loss": 0.1079, "step": 1390 }, { "epoch": 1.2975746268656716, "grad_norm": 0.6064266006667792, "learning_rate": 2.7470365708411673e-06, "loss": 0.0898, "step": 1391 }, { "epoch": 1.2985074626865671, "grad_norm": 0.720402609311349, "learning_rate": 2.740498430000332e-06, "loss": 0.1258, "step": 1392 }, { "epoch": 1.2994402985074627, "grad_norm": 0.6423927645257799, "learning_rate": 2.7339651405111176e-06, "loss": 0.1017, "step": 1393 }, { "epoch": 1.3003731343283582, "grad_norm": 0.659347946788785, "learning_rate": 2.727436716401083e-06, "loss": 0.1189, "step": 1394 }, { "epoch": 1.3013059701492538, "grad_norm": 0.7038473500250781, "learning_rate": 2.7209131716873347e-06, "loss": 0.1311, "step": 1395 }, { "epoch": 1.3022388059701493, "grad_norm": 0.6222350610692811, "learning_rate": 2.714394520376509e-06, "loss": 0.0958, "step": 1396 }, { "epoch": 1.3031716417910448, "grad_norm": 0.6489664952544448, "learning_rate": 2.7078807764647277e-06, "loss": 0.1046, "step": 1397 }, { "epoch": 1.3041044776119404, "grad_norm": 0.6848563036659322, "learning_rate": 2.701371953937583e-06, "loss": 0.1338, "step": 1398 }, { "epoch": 1.3050373134328357, "grad_norm": 0.6571218650156727, "learning_rate": 2.694868066770099e-06, "loss": 0.1101, "step": 1399 }, { "epoch": 1.3059701492537314, "grad_norm": 0.7033558354390776, "learning_rate": 2.6883691289267e-06, "loss": 0.1047, "step": 1400 }, { "epoch": 1.3069029850746268, "grad_norm": 0.5970156273967477, "learning_rate": 2.6818751543611892e-06, "loss": 0.0961, "step": 1401 }, { "epoch": 1.3078358208955223, "grad_norm": 0.6238288216188824, "learning_rate": 2.675386157016706e-06, "loss": 0.1, "step": 1402 }, { "epoch": 1.3087686567164178, "grad_norm": 0.6621798977304396, "learning_rate": 2.6689021508257105e-06, "loss": 0.1342, "step": 1403 }, { "epoch": 1.3097014925373134, "grad_norm": 0.6731449712319134, "learning_rate": 2.6624231497099395e-06, "loss": 0.1132, "step": 1404 }, { "epoch": 1.310634328358209, "grad_norm": 0.6775807829050284, "learning_rate": 2.6559491675803883e-06, "loss": 0.127, "step": 1405 }, { "epoch": 1.3115671641791045, "grad_norm": 0.6305561573799964, "learning_rate": 2.649480218337276e-06, "loss": 0.0876, "step": 1406 }, { "epoch": 1.3125, "grad_norm": 0.6098914861260695, "learning_rate": 2.6430163158700116e-06, "loss": 0.0898, "step": 1407 }, { "epoch": 1.3134328358208955, "grad_norm": 0.6934436256178743, "learning_rate": 2.636557474057173e-06, "loss": 0.1177, "step": 1408 }, { "epoch": 1.314365671641791, "grad_norm": 0.7216643183765422, "learning_rate": 2.6301037067664726e-06, "loss": 0.1549, "step": 1409 }, { "epoch": 1.3152985074626866, "grad_norm": 0.6503183318948267, "learning_rate": 2.623655027854719e-06, "loss": 0.1039, "step": 1410 }, { "epoch": 1.3162313432835822, "grad_norm": 0.6601586252619949, "learning_rate": 2.6172114511678047e-06, "loss": 0.1135, "step": 1411 }, { "epoch": 1.3171641791044777, "grad_norm": 0.6468739356776672, "learning_rate": 2.6107729905406655e-06, "loss": 0.1201, "step": 1412 }, { "epoch": 1.3180970149253732, "grad_norm": 0.6553204482360568, "learning_rate": 2.6043396597972488e-06, "loss": 0.1048, "step": 1413 }, { "epoch": 1.3190298507462686, "grad_norm": 0.6609350764372386, "learning_rate": 2.597911472750494e-06, "loss": 0.1152, "step": 1414 }, { "epoch": 1.3199626865671643, "grad_norm": 0.722078121959011, "learning_rate": 2.5914884432022873e-06, "loss": 0.1214, "step": 1415 }, { "epoch": 1.3208955223880596, "grad_norm": 0.6479630094449343, "learning_rate": 2.585070584943452e-06, "loss": 0.1044, "step": 1416 }, { "epoch": 1.3218283582089552, "grad_norm": 0.6604713996836123, "learning_rate": 2.5786579117536983e-06, "loss": 0.1056, "step": 1417 }, { "epoch": 1.3227611940298507, "grad_norm": 0.6669742976104424, "learning_rate": 2.5722504374016093e-06, "loss": 0.1075, "step": 1418 }, { "epoch": 1.3236940298507462, "grad_norm": 0.6455954425748324, "learning_rate": 2.5658481756446056e-06, "loss": 0.1108, "step": 1419 }, { "epoch": 1.3246268656716418, "grad_norm": 0.6985456987016364, "learning_rate": 2.5594511402289145e-06, "loss": 0.1207, "step": 1420 }, { "epoch": 1.3255597014925373, "grad_norm": 0.6552073085579013, "learning_rate": 2.553059344889543e-06, "loss": 0.1027, "step": 1421 }, { "epoch": 1.3264925373134329, "grad_norm": 0.6765985823026404, "learning_rate": 2.546672803350247e-06, "loss": 0.0987, "step": 1422 }, { "epoch": 1.3274253731343284, "grad_norm": 0.692088563341024, "learning_rate": 2.5402915293234985e-06, "loss": 0.1178, "step": 1423 }, { "epoch": 1.328358208955224, "grad_norm": 0.6360395116571522, "learning_rate": 2.533915536510464e-06, "loss": 0.0882, "step": 1424 }, { "epoch": 1.3292910447761195, "grad_norm": 0.664962246620318, "learning_rate": 2.527544838600969e-06, "loss": 0.1106, "step": 1425 }, { "epoch": 1.330223880597015, "grad_norm": 0.6409906103181033, "learning_rate": 2.521179449273472e-06, "loss": 0.1082, "step": 1426 }, { "epoch": 1.3311567164179103, "grad_norm": 0.6896794871644634, "learning_rate": 2.5148193821950317e-06, "loss": 0.121, "step": 1427 }, { "epoch": 1.332089552238806, "grad_norm": 0.6489873107436956, "learning_rate": 2.5084646510212817e-06, "loss": 0.1087, "step": 1428 }, { "epoch": 1.3330223880597014, "grad_norm": 0.7321447063045473, "learning_rate": 2.5021152693963957e-06, "loss": 0.1063, "step": 1429 }, { "epoch": 1.333955223880597, "grad_norm": 0.6584993914212683, "learning_rate": 2.495771250953061e-06, "loss": 0.0969, "step": 1430 }, { "epoch": 1.3348880597014925, "grad_norm": 0.6207360322373635, "learning_rate": 2.4894326093124534e-06, "loss": 0.1156, "step": 1431 }, { "epoch": 1.335820895522388, "grad_norm": 0.6781529375285159, "learning_rate": 2.4830993580842023e-06, "loss": 0.1077, "step": 1432 }, { "epoch": 1.3367537313432836, "grad_norm": 0.6564495207821696, "learning_rate": 2.476771510866364e-06, "loss": 0.1048, "step": 1433 }, { "epoch": 1.337686567164179, "grad_norm": 0.6709787744188154, "learning_rate": 2.4704490812453907e-06, "loss": 0.1214, "step": 1434 }, { "epoch": 1.3386194029850746, "grad_norm": 0.6500605803055189, "learning_rate": 2.4641320827961063e-06, "loss": 0.1139, "step": 1435 }, { "epoch": 1.3395522388059702, "grad_norm": 0.6810345167191801, "learning_rate": 2.457820529081666e-06, "loss": 0.1386, "step": 1436 }, { "epoch": 1.3404850746268657, "grad_norm": 0.6625674754064363, "learning_rate": 2.4515144336535413e-06, "loss": 0.1129, "step": 1437 }, { "epoch": 1.3414179104477613, "grad_norm": 0.6849772114386645, "learning_rate": 2.445213810051482e-06, "loss": 0.1293, "step": 1438 }, { "epoch": 1.3423507462686568, "grad_norm": 0.672833332903975, "learning_rate": 2.43891867180349e-06, "loss": 0.1059, "step": 1439 }, { "epoch": 1.3432835820895521, "grad_norm": 0.646035557415159, "learning_rate": 2.4326290324257896e-06, "loss": 0.1059, "step": 1440 }, { "epoch": 1.3442164179104479, "grad_norm": 0.5919311303442432, "learning_rate": 2.4263449054227983e-06, "loss": 0.0759, "step": 1441 }, { "epoch": 1.3451492537313432, "grad_norm": 0.6807505860874371, "learning_rate": 2.4200663042870977e-06, "loss": 0.1328, "step": 1442 }, { "epoch": 1.3460820895522387, "grad_norm": 0.6675386644212102, "learning_rate": 2.413793242499402e-06, "loss": 0.1144, "step": 1443 }, { "epoch": 1.3470149253731343, "grad_norm": 0.6984999856091285, "learning_rate": 2.407525733528538e-06, "loss": 0.1275, "step": 1444 }, { "epoch": 1.3479477611940298, "grad_norm": 0.69069151076732, "learning_rate": 2.4012637908314064e-06, "loss": 0.1075, "step": 1445 }, { "epoch": 1.3488805970149254, "grad_norm": 0.681389535053473, "learning_rate": 2.3950074278529566e-06, "loss": 0.1127, "step": 1446 }, { "epoch": 1.349813432835821, "grad_norm": 0.616690642379758, "learning_rate": 2.38875665802616e-06, "loss": 0.1074, "step": 1447 }, { "epoch": 1.3507462686567164, "grad_norm": 0.6579336728387122, "learning_rate": 2.382511494771979e-06, "loss": 0.112, "step": 1448 }, { "epoch": 1.351679104477612, "grad_norm": 0.6247317394949042, "learning_rate": 2.3762719514993327e-06, "loss": 0.0964, "step": 1449 }, { "epoch": 1.3526119402985075, "grad_norm": 0.6449260530866021, "learning_rate": 2.370038041605079e-06, "loss": 0.1177, "step": 1450 }, { "epoch": 1.353544776119403, "grad_norm": 0.644298575496593, "learning_rate": 2.36380977847398e-06, "loss": 0.0848, "step": 1451 }, { "epoch": 1.3544776119402986, "grad_norm": 0.7072930089586612, "learning_rate": 2.357587175478672e-06, "loss": 0.1204, "step": 1452 }, { "epoch": 1.355410447761194, "grad_norm": 0.6106995115345111, "learning_rate": 2.3513702459796406e-06, "loss": 0.0874, "step": 1453 }, { "epoch": 1.3563432835820897, "grad_norm": 0.6294593779037693, "learning_rate": 2.3451590033251887e-06, "loss": 0.097, "step": 1454 }, { "epoch": 1.357276119402985, "grad_norm": 0.6387010921469651, "learning_rate": 2.338953460851408e-06, "loss": 0.0953, "step": 1455 }, { "epoch": 1.3582089552238805, "grad_norm": 0.7224810781900176, "learning_rate": 2.3327536318821496e-06, "loss": 0.111, "step": 1456 }, { "epoch": 1.359141791044776, "grad_norm": 0.6185382041120931, "learning_rate": 2.3265595297290035e-06, "loss": 0.0929, "step": 1457 }, { "epoch": 1.3600746268656716, "grad_norm": 0.6260947075484905, "learning_rate": 2.320371167691258e-06, "loss": 0.0851, "step": 1458 }, { "epoch": 1.3610074626865671, "grad_norm": 0.666563354848548, "learning_rate": 2.31418855905588e-06, "loss": 0.1148, "step": 1459 }, { "epoch": 1.3619402985074627, "grad_norm": 0.7274762179038102, "learning_rate": 2.3080117170974827e-06, "loss": 0.1266, "step": 1460 }, { "epoch": 1.3628731343283582, "grad_norm": 0.6358771139070013, "learning_rate": 2.301840655078298e-06, "loss": 0.097, "step": 1461 }, { "epoch": 1.3638059701492538, "grad_norm": 0.6987405459199605, "learning_rate": 2.2956753862481444e-06, "loss": 0.1236, "step": 1462 }, { "epoch": 1.3647388059701493, "grad_norm": 0.6905770373113316, "learning_rate": 2.289515923844406e-06, "loss": 0.1129, "step": 1463 }, { "epoch": 1.3656716417910448, "grad_norm": 0.6401166936743443, "learning_rate": 2.2833622810919987e-06, "loss": 0.1179, "step": 1464 }, { "epoch": 1.3666044776119404, "grad_norm": 0.5914216764294489, "learning_rate": 2.277214471203342e-06, "loss": 0.0861, "step": 1465 }, { "epoch": 1.3675373134328357, "grad_norm": 0.682138902033341, "learning_rate": 2.2710725073783345e-06, "loss": 0.1275, "step": 1466 }, { "epoch": 1.3684701492537314, "grad_norm": 0.627434694285632, "learning_rate": 2.264936402804322e-06, "loss": 0.0945, "step": 1467 }, { "epoch": 1.3694029850746268, "grad_norm": 0.6346085505903616, "learning_rate": 2.2588061706560643e-06, "loss": 0.1014, "step": 1468 }, { "epoch": 1.3703358208955223, "grad_norm": 0.6131821902172171, "learning_rate": 2.2526818240957217e-06, "loss": 0.0931, "step": 1469 }, { "epoch": 1.3712686567164178, "grad_norm": 0.654756335933124, "learning_rate": 2.2465633762728093e-06, "loss": 0.0975, "step": 1470 }, { "epoch": 1.3722014925373134, "grad_norm": 0.6450571316681467, "learning_rate": 2.240450840324183e-06, "loss": 0.1064, "step": 1471 }, { "epoch": 1.373134328358209, "grad_norm": 0.6301482918205608, "learning_rate": 2.234344229374003e-06, "loss": 0.0906, "step": 1472 }, { "epoch": 1.3740671641791045, "grad_norm": 0.666363734458774, "learning_rate": 2.2282435565337084e-06, "loss": 0.1059, "step": 1473 }, { "epoch": 1.375, "grad_norm": 0.6482332574912063, "learning_rate": 2.2221488349019903e-06, "loss": 0.1079, "step": 1474 }, { "epoch": 1.3759328358208955, "grad_norm": 0.6707138701479982, "learning_rate": 2.216060077564757e-06, "loss": 0.121, "step": 1475 }, { "epoch": 1.376865671641791, "grad_norm": 0.6297441684593365, "learning_rate": 2.2099772975951145e-06, "loss": 0.0939, "step": 1476 }, { "epoch": 1.3777985074626866, "grad_norm": 0.6798192576148419, "learning_rate": 2.203900508053336e-06, "loss": 0.1227, "step": 1477 }, { "epoch": 1.3787313432835822, "grad_norm": 0.6285364918767367, "learning_rate": 2.1978297219868307e-06, "loss": 0.1096, "step": 1478 }, { "epoch": 1.3796641791044777, "grad_norm": 0.7194398043158851, "learning_rate": 2.191764952430119e-06, "loss": 0.1287, "step": 1479 }, { "epoch": 1.3805970149253732, "grad_norm": 0.7467921594377885, "learning_rate": 2.1857062124048036e-06, "loss": 0.1132, "step": 1480 }, { "epoch": 1.3815298507462686, "grad_norm": 0.6711519170469675, "learning_rate": 2.1796535149195362e-06, "loss": 0.1115, "step": 1481 }, { "epoch": 1.3824626865671643, "grad_norm": 0.705472430852045, "learning_rate": 2.1736068729700045e-06, "loss": 0.133, "step": 1482 }, { "epoch": 1.3833955223880596, "grad_norm": 0.5884920058042795, "learning_rate": 2.167566299538883e-06, "loss": 0.0961, "step": 1483 }, { "epoch": 1.3843283582089552, "grad_norm": 0.6598704201107801, "learning_rate": 2.161531807595825e-06, "loss": 0.0995, "step": 1484 }, { "epoch": 1.3852611940298507, "grad_norm": 0.6855317542296866, "learning_rate": 2.155503410097423e-06, "loss": 0.1287, "step": 1485 }, { "epoch": 1.3861940298507462, "grad_norm": 0.6378494565953243, "learning_rate": 2.1494811199871857e-06, "loss": 0.0997, "step": 1486 }, { "epoch": 1.3871268656716418, "grad_norm": 0.692328905216879, "learning_rate": 2.1434649501955062e-06, "loss": 0.1302, "step": 1487 }, { "epoch": 1.3880597014925373, "grad_norm": 0.6916484060483346, "learning_rate": 2.1374549136396417e-06, "loss": 0.1067, "step": 1488 }, { "epoch": 1.3889925373134329, "grad_norm": 0.7002159910436213, "learning_rate": 2.1314510232236723e-06, "loss": 0.1212, "step": 1489 }, { "epoch": 1.3899253731343284, "grad_norm": 0.6667951613333883, "learning_rate": 2.1254532918384892e-06, "loss": 0.1112, "step": 1490 }, { "epoch": 1.390858208955224, "grad_norm": 0.7442374084633578, "learning_rate": 2.119461732361757e-06, "loss": 0.1439, "step": 1491 }, { "epoch": 1.3917910447761195, "grad_norm": 0.6860406674265671, "learning_rate": 2.113476357657889e-06, "loss": 0.12, "step": 1492 }, { "epoch": 1.392723880597015, "grad_norm": 0.6608607804110704, "learning_rate": 2.1074971805780196e-06, "loss": 0.0899, "step": 1493 }, { "epoch": 1.3936567164179103, "grad_norm": 0.6899596644749434, "learning_rate": 2.1015242139599773e-06, "loss": 0.1259, "step": 1494 }, { "epoch": 1.394589552238806, "grad_norm": 0.6893189770457337, "learning_rate": 2.095557470628253e-06, "loss": 0.1003, "step": 1495 }, { "epoch": 1.3955223880597014, "grad_norm": 0.6644774691316409, "learning_rate": 2.089596963393975e-06, "loss": 0.0973, "step": 1496 }, { "epoch": 1.396455223880597, "grad_norm": 0.6373860890671447, "learning_rate": 2.0836427050548874e-06, "loss": 0.1015, "step": 1497 }, { "epoch": 1.3973880597014925, "grad_norm": 0.6968098941132433, "learning_rate": 2.0776947083953136e-06, "loss": 0.1114, "step": 1498 }, { "epoch": 1.398320895522388, "grad_norm": 0.6751205084497587, "learning_rate": 2.071752986186134e-06, "loss": 0.1201, "step": 1499 }, { "epoch": 1.3992537313432836, "grad_norm": 0.6956471917149037, "learning_rate": 2.0658175511847565e-06, "loss": 0.137, "step": 1500 }, { "epoch": 1.3992537313432836, "eval_loss": 0.1641411930322647, "eval_runtime": 4.2021, "eval_samples_per_second": 20.704, "eval_steps_per_second": 5.236, "step": 1500 }, { "epoch": 1.400186567164179, "grad_norm": 0.6181596710522436, "learning_rate": 2.0598884161350923e-06, "loss": 0.0821, "step": 1501 }, { "epoch": 1.4011194029850746, "grad_norm": 0.6865458806143694, "learning_rate": 2.05396559376752e-06, "loss": 0.1308, "step": 1502 }, { "epoch": 1.4020522388059702, "grad_norm": 0.6708467118552782, "learning_rate": 2.0480490967988693e-06, "loss": 0.1067, "step": 1503 }, { "epoch": 1.4029850746268657, "grad_norm": 0.6534218033704017, "learning_rate": 2.042138937932388e-06, "loss": 0.1118, "step": 1504 }, { "epoch": 1.4039179104477613, "grad_norm": 0.6606892802816939, "learning_rate": 2.036235129857715e-06, "loss": 0.125, "step": 1505 }, { "epoch": 1.4048507462686568, "grad_norm": 0.6599236392188506, "learning_rate": 2.0303376852508527e-06, "loss": 0.1082, "step": 1506 }, { "epoch": 1.4057835820895521, "grad_norm": 0.6362343523107495, "learning_rate": 2.0244466167741434e-06, "loss": 0.0972, "step": 1507 }, { "epoch": 1.4067164179104479, "grad_norm": 0.7052366254225951, "learning_rate": 2.018561937076236e-06, "loss": 0.15, "step": 1508 }, { "epoch": 1.4076492537313432, "grad_norm": 0.6684301384117411, "learning_rate": 2.0126836587920605e-06, "loss": 0.0963, "step": 1509 }, { "epoch": 1.4085820895522387, "grad_norm": 0.6578886608789742, "learning_rate": 2.0068117945428077e-06, "loss": 0.0964, "step": 1510 }, { "epoch": 1.4095149253731343, "grad_norm": 0.6530447041506278, "learning_rate": 2.0009463569358937e-06, "loss": 0.1046, "step": 1511 }, { "epoch": 1.4104477611940298, "grad_norm": 0.623602042051879, "learning_rate": 1.995087358564938e-06, "loss": 0.0944, "step": 1512 }, { "epoch": 1.4113805970149254, "grad_norm": 0.6626314696618424, "learning_rate": 1.989234812009732e-06, "loss": 0.1068, "step": 1513 }, { "epoch": 1.412313432835821, "grad_norm": 0.7250272929930001, "learning_rate": 1.9833887298362185e-06, "loss": 0.1236, "step": 1514 }, { "epoch": 1.4132462686567164, "grad_norm": 0.6375736198326654, "learning_rate": 1.9775491245964535e-06, "loss": 0.0998, "step": 1515 }, { "epoch": 1.414179104477612, "grad_norm": 0.6557951543450196, "learning_rate": 1.971716008828593e-06, "loss": 0.1136, "step": 1516 }, { "epoch": 1.4151119402985075, "grad_norm": 0.6488980393111483, "learning_rate": 1.9658893950568574e-06, "loss": 0.1099, "step": 1517 }, { "epoch": 1.416044776119403, "grad_norm": 0.6249224268402398, "learning_rate": 1.9600692957915076e-06, "loss": 0.0951, "step": 1518 }, { "epoch": 1.4169776119402986, "grad_norm": 0.662144405176221, "learning_rate": 1.9542557235288146e-06, "loss": 0.1065, "step": 1519 }, { "epoch": 1.417910447761194, "grad_norm": 0.7003180506212673, "learning_rate": 1.9484486907510405e-06, "loss": 0.134, "step": 1520 }, { "epoch": 1.4188432835820897, "grad_norm": 0.684501896092995, "learning_rate": 1.9426482099264e-06, "loss": 0.1088, "step": 1521 }, { "epoch": 1.419776119402985, "grad_norm": 0.5931311696234539, "learning_rate": 1.936854293509043e-06, "loss": 0.0767, "step": 1522 }, { "epoch": 1.4207089552238805, "grad_norm": 0.6266603166536535, "learning_rate": 1.9310669539390266e-06, "loss": 0.0924, "step": 1523 }, { "epoch": 1.421641791044776, "grad_norm": 0.6968245505267582, "learning_rate": 1.925286203642285e-06, "loss": 0.1126, "step": 1524 }, { "epoch": 1.4225746268656716, "grad_norm": 0.7194019553888731, "learning_rate": 1.919512055030606e-06, "loss": 0.1428, "step": 1525 }, { "epoch": 1.4235074626865671, "grad_norm": 0.6715278077445609, "learning_rate": 1.913744520501602e-06, "loss": 0.1117, "step": 1526 }, { "epoch": 1.4244402985074627, "grad_norm": 0.6906481614500797, "learning_rate": 1.9079836124386865e-06, "loss": 0.1208, "step": 1527 }, { "epoch": 1.4253731343283582, "grad_norm": 0.6464169464389008, "learning_rate": 1.90222934321104e-06, "loss": 0.1013, "step": 1528 }, { "epoch": 1.4263059701492538, "grad_norm": 0.6692039848791531, "learning_rate": 1.896481725173594e-06, "loss": 0.1191, "step": 1529 }, { "epoch": 1.4272388059701493, "grad_norm": 0.6845043683672564, "learning_rate": 1.8907407706669972e-06, "loss": 0.114, "step": 1530 }, { "epoch": 1.4281716417910448, "grad_norm": 0.6888292173453365, "learning_rate": 1.8850064920175927e-06, "loss": 0.1111, "step": 1531 }, { "epoch": 1.4291044776119404, "grad_norm": 0.6484291186033274, "learning_rate": 1.8792789015373875e-06, "loss": 0.1221, "step": 1532 }, { "epoch": 1.4300373134328357, "grad_norm": 0.6565298588787157, "learning_rate": 1.873558011524032e-06, "loss": 0.0981, "step": 1533 }, { "epoch": 1.4309701492537314, "grad_norm": 0.6764915731012244, "learning_rate": 1.8678438342607846e-06, "loss": 0.1287, "step": 1534 }, { "epoch": 1.4319029850746268, "grad_norm": 0.6820575263151437, "learning_rate": 1.8621363820164978e-06, "loss": 0.1119, "step": 1535 }, { "epoch": 1.4328358208955223, "grad_norm": 0.6414014006921219, "learning_rate": 1.856435667045577e-06, "loss": 0.1179, "step": 1536 }, { "epoch": 1.4337686567164178, "grad_norm": 0.6312587705113811, "learning_rate": 1.850741701587968e-06, "loss": 0.0993, "step": 1537 }, { "epoch": 1.4347014925373134, "grad_norm": 0.674965007954544, "learning_rate": 1.8450544978691237e-06, "loss": 0.1048, "step": 1538 }, { "epoch": 1.435634328358209, "grad_norm": 0.6028396051744752, "learning_rate": 1.8393740680999783e-06, "loss": 0.0763, "step": 1539 }, { "epoch": 1.4365671641791045, "grad_norm": 0.6553368806033338, "learning_rate": 1.8337004244769225e-06, "loss": 0.105, "step": 1540 }, { "epoch": 1.4375, "grad_norm": 0.681897017906151, "learning_rate": 1.8280335791817733e-06, "loss": 0.1114, "step": 1541 }, { "epoch": 1.4384328358208955, "grad_norm": 0.6785555431374163, "learning_rate": 1.8223735443817546e-06, "loss": 0.1332, "step": 1542 }, { "epoch": 1.439365671641791, "grad_norm": 0.6318537401714203, "learning_rate": 1.8167203322294673e-06, "loss": 0.0961, "step": 1543 }, { "epoch": 1.4402985074626866, "grad_norm": 0.6511388103592578, "learning_rate": 1.811073954862862e-06, "loss": 0.0998, "step": 1544 }, { "epoch": 1.4412313432835822, "grad_norm": 0.6267159209200023, "learning_rate": 1.8054344244052153e-06, "loss": 0.0933, "step": 1545 }, { "epoch": 1.4421641791044777, "grad_norm": 0.6907396322545117, "learning_rate": 1.7998017529651042e-06, "loss": 0.1148, "step": 1546 }, { "epoch": 1.4430970149253732, "grad_norm": 0.6838056020120648, "learning_rate": 1.7941759526363739e-06, "loss": 0.1384, "step": 1547 }, { "epoch": 1.4440298507462686, "grad_norm": 0.638624108204196, "learning_rate": 1.7885570354981236e-06, "loss": 0.1018, "step": 1548 }, { "epoch": 1.4449626865671643, "grad_norm": 0.6070160278232517, "learning_rate": 1.7829450136146664e-06, "loss": 0.0823, "step": 1549 }, { "epoch": 1.4458955223880596, "grad_norm": 0.6253485428698212, "learning_rate": 1.7773398990355162e-06, "loss": 0.091, "step": 1550 }, { "epoch": 1.4468283582089552, "grad_norm": 0.7058217891061699, "learning_rate": 1.771741703795355e-06, "loss": 0.1391, "step": 1551 }, { "epoch": 1.4477611940298507, "grad_norm": 0.6303810637719384, "learning_rate": 1.7661504399140066e-06, "loss": 0.0846, "step": 1552 }, { "epoch": 1.4486940298507462, "grad_norm": 0.667169343557509, "learning_rate": 1.7605661193964169e-06, "loss": 0.122, "step": 1553 }, { "epoch": 1.4496268656716418, "grad_norm": 0.6738085781299379, "learning_rate": 1.754988754232616e-06, "loss": 0.12, "step": 1554 }, { "epoch": 1.4505597014925373, "grad_norm": 0.6659662982935662, "learning_rate": 1.749418356397708e-06, "loss": 0.1097, "step": 1555 }, { "epoch": 1.4514925373134329, "grad_norm": 0.6588716555174926, "learning_rate": 1.7438549378518331e-06, "loss": 0.1265, "step": 1556 }, { "epoch": 1.4524253731343284, "grad_norm": 0.6738477899845299, "learning_rate": 1.7382985105401485e-06, "loss": 0.1361, "step": 1557 }, { "epoch": 1.453358208955224, "grad_norm": 0.6512883012647412, "learning_rate": 1.7327490863927998e-06, "loss": 0.1085, "step": 1558 }, { "epoch": 1.4542910447761195, "grad_norm": 0.6835988091290107, "learning_rate": 1.7272066773248974e-06, "loss": 0.1086, "step": 1559 }, { "epoch": 1.455223880597015, "grad_norm": 0.6388919247417302, "learning_rate": 1.721671295236485e-06, "loss": 0.0904, "step": 1560 }, { "epoch": 1.4561567164179103, "grad_norm": 0.6569943523778403, "learning_rate": 1.7161429520125244e-06, "loss": 0.0945, "step": 1561 }, { "epoch": 1.457089552238806, "grad_norm": 0.6569546061752831, "learning_rate": 1.7106216595228636e-06, "loss": 0.1078, "step": 1562 }, { "epoch": 1.4580223880597014, "grad_norm": 0.6545117608729569, "learning_rate": 1.705107429622207e-06, "loss": 0.1005, "step": 1563 }, { "epoch": 1.458955223880597, "grad_norm": 0.6746976443306328, "learning_rate": 1.6996002741500999e-06, "loss": 0.1254, "step": 1564 }, { "epoch": 1.4598880597014925, "grad_norm": 0.6208877487520207, "learning_rate": 1.694100204930898e-06, "loss": 0.0983, "step": 1565 }, { "epoch": 1.460820895522388, "grad_norm": 0.7167086045059924, "learning_rate": 1.6886072337737418e-06, "loss": 0.1422, "step": 1566 }, { "epoch": 1.4617537313432836, "grad_norm": 0.6340772344690622, "learning_rate": 1.6831213724725282e-06, "loss": 0.0994, "step": 1567 }, { "epoch": 1.462686567164179, "grad_norm": 0.5824551420704587, "learning_rate": 1.677642632805892e-06, "loss": 0.0762, "step": 1568 }, { "epoch": 1.4636194029850746, "grad_norm": 0.6142276249312828, "learning_rate": 1.672171026537177e-06, "loss": 0.0992, "step": 1569 }, { "epoch": 1.4645522388059702, "grad_norm": 0.746179669127377, "learning_rate": 1.6667065654144105e-06, "loss": 0.1504, "step": 1570 }, { "epoch": 1.4654850746268657, "grad_norm": 0.6457039315112069, "learning_rate": 1.661249261170278e-06, "loss": 0.1135, "step": 1571 }, { "epoch": 1.4664179104477613, "grad_norm": 0.6963591594845918, "learning_rate": 1.6557991255221007e-06, "loss": 0.1316, "step": 1572 }, { "epoch": 1.4673507462686568, "grad_norm": 0.6785479226795084, "learning_rate": 1.650356170171804e-06, "loss": 0.1173, "step": 1573 }, { "epoch": 1.4682835820895521, "grad_norm": 0.672972027506251, "learning_rate": 1.6449204068058994e-06, "loss": 0.0937, "step": 1574 }, { "epoch": 1.4692164179104479, "grad_norm": 0.608652978347236, "learning_rate": 1.639491847095459e-06, "loss": 0.0913, "step": 1575 }, { "epoch": 1.4701492537313432, "grad_norm": 0.6410718362160176, "learning_rate": 1.6340705026960818e-06, "loss": 0.0993, "step": 1576 }, { "epoch": 1.4710820895522387, "grad_norm": 0.6635936884306197, "learning_rate": 1.6286563852478787e-06, "loss": 0.1126, "step": 1577 }, { "epoch": 1.4720149253731343, "grad_norm": 0.6773750672326977, "learning_rate": 1.623249506375445e-06, "loss": 0.1197, "step": 1578 }, { "epoch": 1.4729477611940298, "grad_norm": 0.6559093755938066, "learning_rate": 1.6178498776878333e-06, "loss": 0.1073, "step": 1579 }, { "epoch": 1.4738805970149254, "grad_norm": 0.6915109538380028, "learning_rate": 1.6124575107785245e-06, "loss": 0.1228, "step": 1580 }, { "epoch": 1.474813432835821, "grad_norm": 0.6491250995852719, "learning_rate": 1.6070724172254148e-06, "loss": 0.1036, "step": 1581 }, { "epoch": 1.4757462686567164, "grad_norm": 0.6746767645130203, "learning_rate": 1.6016946085907798e-06, "loss": 0.1155, "step": 1582 }, { "epoch": 1.476679104477612, "grad_norm": 0.6692576663807477, "learning_rate": 1.5963240964212556e-06, "loss": 0.1022, "step": 1583 }, { "epoch": 1.4776119402985075, "grad_norm": 0.6412042409557248, "learning_rate": 1.5909608922478108e-06, "loss": 0.1021, "step": 1584 }, { "epoch": 1.478544776119403, "grad_norm": 0.649809307134891, "learning_rate": 1.585605007585726e-06, "loss": 0.0975, "step": 1585 }, { "epoch": 1.4794776119402986, "grad_norm": 0.7036770211382395, "learning_rate": 1.5802564539345599e-06, "loss": 0.1298, "step": 1586 }, { "epoch": 1.480410447761194, "grad_norm": 0.6750039391484205, "learning_rate": 1.5749152427781367e-06, "loss": 0.1148, "step": 1587 }, { "epoch": 1.4813432835820897, "grad_norm": 0.7005196324188251, "learning_rate": 1.5695813855845149e-06, "loss": 0.1218, "step": 1588 }, { "epoch": 1.482276119402985, "grad_norm": 0.6796719722256693, "learning_rate": 1.5642548938059588e-06, "loss": 0.096, "step": 1589 }, { "epoch": 1.4832089552238805, "grad_norm": 0.6108802416921537, "learning_rate": 1.5589357788789244e-06, "loss": 0.0819, "step": 1590 }, { "epoch": 1.484141791044776, "grad_norm": 0.6862992732364115, "learning_rate": 1.5536240522240259e-06, "loss": 0.1293, "step": 1591 }, { "epoch": 1.4850746268656716, "grad_norm": 0.6939910462313265, "learning_rate": 1.5483197252460158e-06, "loss": 0.1332, "step": 1592 }, { "epoch": 1.4860074626865671, "grad_norm": 0.6804274304041169, "learning_rate": 1.543022809333755e-06, "loss": 0.1267, "step": 1593 }, { "epoch": 1.4869402985074627, "grad_norm": 0.6187611259970937, "learning_rate": 1.537733315860197e-06, "loss": 0.0846, "step": 1594 }, { "epoch": 1.4878731343283582, "grad_norm": 0.6965103135799555, "learning_rate": 1.5324512561823562e-06, "loss": 0.1318, "step": 1595 }, { "epoch": 1.4888059701492538, "grad_norm": 0.6151697548981102, "learning_rate": 1.527176641641286e-06, "loss": 0.0952, "step": 1596 }, { "epoch": 1.4897388059701493, "grad_norm": 0.6966764641051227, "learning_rate": 1.5219094835620546e-06, "loss": 0.144, "step": 1597 }, { "epoch": 1.4906716417910448, "grad_norm": 0.6699091866455897, "learning_rate": 1.5166497932537233e-06, "loss": 0.119, "step": 1598 }, { "epoch": 1.4916044776119404, "grad_norm": 0.6635739730771911, "learning_rate": 1.5113975820093129e-06, "loss": 0.1191, "step": 1599 }, { "epoch": 1.4925373134328357, "grad_norm": 0.654643709140437, "learning_rate": 1.5061528611057917e-06, "loss": 0.0982, "step": 1600 }, { "epoch": 1.4934701492537314, "grad_norm": 0.7061530748096663, "learning_rate": 1.5009156418040443e-06, "loss": 0.1277, "step": 1601 }, { "epoch": 1.4944029850746268, "grad_norm": 0.6544638900734074, "learning_rate": 1.4956859353488484e-06, "loss": 0.0965, "step": 1602 }, { "epoch": 1.4953358208955223, "grad_norm": 0.5744469621010171, "learning_rate": 1.4904637529688492e-06, "loss": 0.0781, "step": 1603 }, { "epoch": 1.4962686567164178, "grad_norm": 0.6728635079673462, "learning_rate": 1.4852491058765388e-06, "loss": 0.1276, "step": 1604 }, { "epoch": 1.4972014925373134, "grad_norm": 0.6712470901812674, "learning_rate": 1.4800420052682308e-06, "loss": 0.109, "step": 1605 }, { "epoch": 1.498134328358209, "grad_norm": 0.6673896135698573, "learning_rate": 1.4748424623240364e-06, "loss": 0.111, "step": 1606 }, { "epoch": 1.4990671641791045, "grad_norm": 0.6483139997809061, "learning_rate": 1.4696504882078361e-06, "loss": 0.0952, "step": 1607 }, { "epoch": 1.5, "grad_norm": 0.710185796231368, "learning_rate": 1.4644660940672628e-06, "loss": 0.0998, "step": 1608 }, { "epoch": 1.5009328358208955, "grad_norm": 0.6966805939502615, "learning_rate": 1.4592892910336738e-06, "loss": 0.1046, "step": 1609 }, { "epoch": 1.501865671641791, "grad_norm": 0.6403641228466286, "learning_rate": 1.4541200902221276e-06, "loss": 0.0977, "step": 1610 }, { "epoch": 1.5027985074626866, "grad_norm": 0.6890787644297524, "learning_rate": 1.4489585027313613e-06, "loss": 0.1231, "step": 1611 }, { "epoch": 1.5037313432835822, "grad_norm": 0.6782080593257965, "learning_rate": 1.4438045396437606e-06, "loss": 0.0965, "step": 1612 }, { "epoch": 1.5046641791044775, "grad_norm": 0.6136059626199658, "learning_rate": 1.4386582120253467e-06, "loss": 0.0912, "step": 1613 }, { "epoch": 1.5055970149253732, "grad_norm": 0.5981262814709045, "learning_rate": 1.433519530925745e-06, "loss": 0.0834, "step": 1614 }, { "epoch": 1.5065298507462686, "grad_norm": 0.6242937048357907, "learning_rate": 1.4283885073781628e-06, "loss": 0.0962, "step": 1615 }, { "epoch": 1.5074626865671643, "grad_norm": 0.6621456541946633, "learning_rate": 1.4232651523993635e-06, "loss": 0.1035, "step": 1616 }, { "epoch": 1.5083955223880596, "grad_norm": 0.6296606596379968, "learning_rate": 1.4181494769896487e-06, "loss": 0.0989, "step": 1617 }, { "epoch": 1.5093283582089554, "grad_norm": 0.6355375571502848, "learning_rate": 1.413041492132831e-06, "loss": 0.0978, "step": 1618 }, { "epoch": 1.5102611940298507, "grad_norm": 0.6347302676727622, "learning_rate": 1.4079412087962113e-06, "loss": 0.1082, "step": 1619 }, { "epoch": 1.5111940298507462, "grad_norm": 0.6454580185713826, "learning_rate": 1.4028486379305507e-06, "loss": 0.1201, "step": 1620 }, { "epoch": 1.5121268656716418, "grad_norm": 0.6785945090438603, "learning_rate": 1.397763790470054e-06, "loss": 0.1199, "step": 1621 }, { "epoch": 1.5130597014925373, "grad_norm": 0.6531216677533176, "learning_rate": 1.3926866773323434e-06, "loss": 0.1162, "step": 1622 }, { "epoch": 1.5139925373134329, "grad_norm": 0.6725943403965682, "learning_rate": 1.3876173094184341e-06, "loss": 0.1207, "step": 1623 }, { "epoch": 1.5149253731343284, "grad_norm": 0.6664471124575793, "learning_rate": 1.3825556976127119e-06, "loss": 0.1146, "step": 1624 }, { "epoch": 1.515858208955224, "grad_norm": 0.6470747860473635, "learning_rate": 1.3775018527829103e-06, "loss": 0.1153, "step": 1625 }, { "epoch": 1.5167910447761193, "grad_norm": 0.6250716056830056, "learning_rate": 1.3724557857800824e-06, "loss": 0.0927, "step": 1626 }, { "epoch": 1.517723880597015, "grad_norm": 0.6291450516810418, "learning_rate": 1.3674175074385866e-06, "loss": 0.0995, "step": 1627 }, { "epoch": 1.5186567164179103, "grad_norm": 0.6474065900877641, "learning_rate": 1.362387028576056e-06, "loss": 0.1076, "step": 1628 }, { "epoch": 1.519589552238806, "grad_norm": 0.7073834703513577, "learning_rate": 1.3573643599933794e-06, "loss": 0.1155, "step": 1629 }, { "epoch": 1.5205223880597014, "grad_norm": 0.6591492941191958, "learning_rate": 1.3523495124746722e-06, "loss": 0.0973, "step": 1630 }, { "epoch": 1.5214552238805972, "grad_norm": 0.644173710536433, "learning_rate": 1.3473424967872606e-06, "loss": 0.0968, "step": 1631 }, { "epoch": 1.5223880597014925, "grad_norm": 0.6044575630772596, "learning_rate": 1.3423433236816563e-06, "loss": 0.0792, "step": 1632 }, { "epoch": 1.523320895522388, "grad_norm": 0.6517118660779605, "learning_rate": 1.3373520038915271e-06, "loss": 0.1138, "step": 1633 }, { "epoch": 1.5242537313432836, "grad_norm": 0.6527882411396486, "learning_rate": 1.332368548133684e-06, "loss": 0.1083, "step": 1634 }, { "epoch": 1.525186567164179, "grad_norm": 0.6689344038956665, "learning_rate": 1.3273929671080515e-06, "loss": 0.1053, "step": 1635 }, { "epoch": 1.5261194029850746, "grad_norm": 0.656029620412247, "learning_rate": 1.322425271497646e-06, "loss": 0.0949, "step": 1636 }, { "epoch": 1.5270522388059702, "grad_norm": 0.6285515105278836, "learning_rate": 1.3174654719685537e-06, "loss": 0.0847, "step": 1637 }, { "epoch": 1.5279850746268657, "grad_norm": 0.6710362501178263, "learning_rate": 1.3125135791699084e-06, "loss": 0.121, "step": 1638 }, { "epoch": 1.528917910447761, "grad_norm": 0.6468689466612699, "learning_rate": 1.3075696037338636e-06, "loss": 0.1038, "step": 1639 }, { "epoch": 1.5298507462686568, "grad_norm": 0.6931337952318161, "learning_rate": 1.302633556275577e-06, "loss": 0.1149, "step": 1640 }, { "epoch": 1.5307835820895521, "grad_norm": 0.7153664634241063, "learning_rate": 1.2977054473931838e-06, "loss": 0.116, "step": 1641 }, { "epoch": 1.5317164179104479, "grad_norm": 0.659850994943395, "learning_rate": 1.292785287667775e-06, "loss": 0.0956, "step": 1642 }, { "epoch": 1.5326492537313432, "grad_norm": 0.6636739185108111, "learning_rate": 1.2878730876633694e-06, "loss": 0.1042, "step": 1643 }, { "epoch": 1.533582089552239, "grad_norm": 0.8525095648001452, "learning_rate": 1.2829688579269006e-06, "loss": 0.1493, "step": 1644 }, { "epoch": 1.5345149253731343, "grad_norm": 0.6465723037654459, "learning_rate": 1.27807260898819e-06, "loss": 0.0952, "step": 1645 }, { "epoch": 1.5354477611940298, "grad_norm": 0.7761600831642712, "learning_rate": 1.2731843513599179e-06, "loss": 0.1708, "step": 1646 }, { "epoch": 1.5363805970149254, "grad_norm": 0.6938076302575676, "learning_rate": 1.2683040955376109e-06, "loss": 0.1409, "step": 1647 }, { "epoch": 1.537313432835821, "grad_norm": 0.741457830281847, "learning_rate": 1.2634318519996148e-06, "loss": 0.1398, "step": 1648 }, { "epoch": 1.5382462686567164, "grad_norm": 0.6441435266491738, "learning_rate": 1.258567631207071e-06, "loss": 0.0977, "step": 1649 }, { "epoch": 1.539179104477612, "grad_norm": 0.606488542913593, "learning_rate": 1.253711443603896e-06, "loss": 0.084, "step": 1650 }, { "epoch": 1.5401119402985075, "grad_norm": 0.6257182354965015, "learning_rate": 1.2488632996167594e-06, "loss": 0.0926, "step": 1651 }, { "epoch": 1.5410447761194028, "grad_norm": 0.6406648023730214, "learning_rate": 1.244023209655057e-06, "loss": 0.1022, "step": 1652 }, { "epoch": 1.5419776119402986, "grad_norm": 0.6427586978560643, "learning_rate": 1.239191184110895e-06, "loss": 0.1049, "step": 1653 }, { "epoch": 1.542910447761194, "grad_norm": 0.7629954591188194, "learning_rate": 1.2343672333590639e-06, "loss": 0.1621, "step": 1654 }, { "epoch": 1.5438432835820897, "grad_norm": 0.6523951548563554, "learning_rate": 1.2295513677570176e-06, "loss": 0.1018, "step": 1655 }, { "epoch": 1.544776119402985, "grad_norm": 0.6166042045116035, "learning_rate": 1.2247435976448474e-06, "loss": 0.0955, "step": 1656 }, { "epoch": 1.5457089552238807, "grad_norm": 0.6660072112277334, "learning_rate": 1.2199439333452667e-06, "loss": 0.1274, "step": 1657 }, { "epoch": 1.546641791044776, "grad_norm": 0.6853836668973566, "learning_rate": 1.2151523851635839e-06, "loss": 0.12, "step": 1658 }, { "epoch": 1.5475746268656716, "grad_norm": 0.6696740157758166, "learning_rate": 1.2103689633876781e-06, "loss": 0.1116, "step": 1659 }, { "epoch": 1.5485074626865671, "grad_norm": 0.6710327230212384, "learning_rate": 1.2055936782879845e-06, "loss": 0.103, "step": 1660 }, { "epoch": 1.5494402985074627, "grad_norm": 0.652565586465193, "learning_rate": 1.2008265401174673e-06, "loss": 0.109, "step": 1661 }, { "epoch": 1.5503731343283582, "grad_norm": 0.657012239345551, "learning_rate": 1.1960675591115966e-06, "loss": 0.1189, "step": 1662 }, { "epoch": 1.5513059701492538, "grad_norm": 0.6510786168297996, "learning_rate": 1.1913167454883306e-06, "loss": 0.1102, "step": 1663 }, { "epoch": 1.5522388059701493, "grad_norm": 0.7509300321363932, "learning_rate": 1.186574109448091e-06, "loss": 0.1662, "step": 1664 }, { "epoch": 1.5531716417910446, "grad_norm": 0.6170217169104001, "learning_rate": 1.1818396611737381e-06, "loss": 0.0864, "step": 1665 }, { "epoch": 1.5541044776119404, "grad_norm": 0.6727157843027806, "learning_rate": 1.1771134108305572e-06, "loss": 0.119, "step": 1666 }, { "epoch": 1.5550373134328357, "grad_norm": 0.6752840569308515, "learning_rate": 1.1723953685662287e-06, "loss": 0.1228, "step": 1667 }, { "epoch": 1.5559701492537314, "grad_norm": 0.636490976732929, "learning_rate": 1.1676855445108114e-06, "loss": 0.0957, "step": 1668 }, { "epoch": 1.5569029850746268, "grad_norm": 0.651016849257782, "learning_rate": 1.1629839487767198e-06, "loss": 0.1202, "step": 1669 }, { "epoch": 1.5578358208955225, "grad_norm": 0.634778890603816, "learning_rate": 1.1582905914586961e-06, "loss": 0.0984, "step": 1670 }, { "epoch": 1.5587686567164178, "grad_norm": 0.6586183136882618, "learning_rate": 1.1536054826338005e-06, "loss": 0.104, "step": 1671 }, { "epoch": 1.5597014925373134, "grad_norm": 0.6474353519957355, "learning_rate": 1.148928632361378e-06, "loss": 0.1014, "step": 1672 }, { "epoch": 1.560634328358209, "grad_norm": 0.6553648781001274, "learning_rate": 1.1442600506830443e-06, "loss": 0.1102, "step": 1673 }, { "epoch": 1.5615671641791045, "grad_norm": 0.6866140826556648, "learning_rate": 1.1395997476226612e-06, "loss": 0.1161, "step": 1674 }, { "epoch": 1.5625, "grad_norm": 0.6338777739556756, "learning_rate": 1.134947733186315e-06, "loss": 0.0952, "step": 1675 }, { "epoch": 1.5634328358208955, "grad_norm": 0.6180245412280053, "learning_rate": 1.1303040173622977e-06, "loss": 0.0912, "step": 1676 }, { "epoch": 1.564365671641791, "grad_norm": 0.6901893620166646, "learning_rate": 1.1256686101210818e-06, "loss": 0.1174, "step": 1677 }, { "epoch": 1.5652985074626866, "grad_norm": 0.6424611371932021, "learning_rate": 1.1210415214152976e-06, "loss": 0.1094, "step": 1678 }, { "epoch": 1.5662313432835822, "grad_norm": 0.6734425951970067, "learning_rate": 1.1164227611797202e-06, "loss": 0.1099, "step": 1679 }, { "epoch": 1.5671641791044775, "grad_norm": 0.682818218847526, "learning_rate": 1.1118123393312397e-06, "loss": 0.1307, "step": 1680 }, { "epoch": 1.5680970149253732, "grad_norm": 0.6300078962882174, "learning_rate": 1.1072102657688434e-06, "loss": 0.0912, "step": 1681 }, { "epoch": 1.5690298507462686, "grad_norm": 0.6816592840875468, "learning_rate": 1.1026165503735959e-06, "loss": 0.1132, "step": 1682 }, { "epoch": 1.5699626865671643, "grad_norm": 0.6747739767602066, "learning_rate": 1.0980312030086104e-06, "loss": 0.1053, "step": 1683 }, { "epoch": 1.5708955223880596, "grad_norm": 0.6629184340140629, "learning_rate": 1.0934542335190418e-06, "loss": 0.0935, "step": 1684 }, { "epoch": 1.5718283582089554, "grad_norm": 0.6602033200029149, "learning_rate": 1.0888856517320478e-06, "loss": 0.1095, "step": 1685 }, { "epoch": 1.5727611940298507, "grad_norm": 0.7161452173123822, "learning_rate": 1.0843254674567832e-06, "loss": 0.0974, "step": 1686 }, { "epoch": 1.5736940298507462, "grad_norm": 0.6979641974278543, "learning_rate": 1.079773690484372e-06, "loss": 0.126, "step": 1687 }, { "epoch": 1.5746268656716418, "grad_norm": 0.6366246421446787, "learning_rate": 1.075230330587884e-06, "loss": 0.1026, "step": 1688 }, { "epoch": 1.5755597014925373, "grad_norm": 0.6510270636359718, "learning_rate": 1.07069539752232e-06, "loss": 0.1088, "step": 1689 }, { "epoch": 1.5764925373134329, "grad_norm": 0.6079252623564186, "learning_rate": 1.0661689010245868e-06, "loss": 0.0859, "step": 1690 }, { "epoch": 1.5774253731343284, "grad_norm": 0.6673688422979873, "learning_rate": 1.0616508508134737e-06, "loss": 0.1362, "step": 1691 }, { "epoch": 1.578358208955224, "grad_norm": 0.6278541240776503, "learning_rate": 1.0571412565896406e-06, "loss": 0.0866, "step": 1692 }, { "epoch": 1.5792910447761193, "grad_norm": 0.6721145366341891, "learning_rate": 1.052640128035587e-06, "loss": 0.1272, "step": 1693 }, { "epoch": 1.580223880597015, "grad_norm": 0.5882039867793266, "learning_rate": 1.048147474815639e-06, "loss": 0.0873, "step": 1694 }, { "epoch": 1.5811567164179103, "grad_norm": 0.6459112809984757, "learning_rate": 1.0436633065759243e-06, "loss": 0.0925, "step": 1695 }, { "epoch": 1.582089552238806, "grad_norm": 0.7214225951975882, "learning_rate": 1.0391876329443534e-06, "loss": 0.1532, "step": 1696 }, { "epoch": 1.5830223880597014, "grad_norm": 0.7080045902307683, "learning_rate": 1.0347204635305963e-06, "loss": 0.1068, "step": 1697 }, { "epoch": 1.5839552238805972, "grad_norm": 0.5952401295295793, "learning_rate": 1.030261807926063e-06, "loss": 0.0799, "step": 1698 }, { "epoch": 1.5848880597014925, "grad_norm": 0.6766510150467787, "learning_rate": 1.0258116757038862e-06, "loss": 0.108, "step": 1699 }, { "epoch": 1.585820895522388, "grad_norm": 0.6568326050177908, "learning_rate": 1.0213700764188978e-06, "loss": 0.1158, "step": 1700 }, { "epoch": 1.5867537313432836, "grad_norm": 0.6479375644089618, "learning_rate": 1.0169370196076073e-06, "loss": 0.095, "step": 1701 }, { "epoch": 1.587686567164179, "grad_norm": 0.6142445218300894, "learning_rate": 1.0125125147881842e-06, "loss": 0.085, "step": 1702 }, { "epoch": 1.5886194029850746, "grad_norm": 0.6326510172505997, "learning_rate": 1.0080965714604368e-06, "loss": 0.0991, "step": 1703 }, { "epoch": 1.5895522388059702, "grad_norm": 0.6468896942859528, "learning_rate": 1.0036891991057863e-06, "loss": 0.1157, "step": 1704 }, { "epoch": 1.5904850746268657, "grad_norm": 0.6585255689564113, "learning_rate": 9.992904071872567e-07, "loss": 0.1085, "step": 1705 }, { "epoch": 1.591417910447761, "grad_norm": 0.62463976340872, "learning_rate": 9.949002051494467e-07, "loss": 0.106, "step": 1706 }, { "epoch": 1.5923507462686568, "grad_norm": 0.7048178275131809, "learning_rate": 9.90518602418512e-07, "loss": 0.134, "step": 1707 }, { "epoch": 1.5932835820895521, "grad_norm": 0.6260166916161335, "learning_rate": 9.861456084021448e-07, "loss": 0.0864, "step": 1708 }, { "epoch": 1.5942164179104479, "grad_norm": 0.5949493524122189, "learning_rate": 9.81781232489556e-07, "loss": 0.0843, "step": 1709 }, { "epoch": 1.5951492537313432, "grad_norm": 0.6198705033835488, "learning_rate": 9.774254840514474e-07, "loss": 0.0932, "step": 1710 }, { "epoch": 1.596082089552239, "grad_norm": 0.6865920777724672, "learning_rate": 9.730783724400005e-07, "loss": 0.1223, "step": 1711 }, { "epoch": 1.5970149253731343, "grad_norm": 0.66946465913934, "learning_rate": 9.687399069888515e-07, "loss": 0.1165, "step": 1712 }, { "epoch": 1.5979477611940298, "grad_norm": 0.6468118415206752, "learning_rate": 9.644100970130743e-07, "loss": 0.0872, "step": 1713 }, { "epoch": 1.5988805970149254, "grad_norm": 0.6964353891944313, "learning_rate": 9.600889518091572e-07, "loss": 0.1053, "step": 1714 }, { "epoch": 1.599813432835821, "grad_norm": 0.6251491532674048, "learning_rate": 9.557764806549852e-07, "loss": 0.0953, "step": 1715 }, { "epoch": 1.6007462686567164, "grad_norm": 0.6860292294775532, "learning_rate": 9.514726928098189e-07, "loss": 0.1165, "step": 1716 }, { "epoch": 1.601679104477612, "grad_norm": 0.630547141736314, "learning_rate": 9.471775975142739e-07, "loss": 0.0967, "step": 1717 }, { "epoch": 1.6026119402985075, "grad_norm": 0.6999857798270286, "learning_rate": 9.428912039903043e-07, "loss": 0.1327, "step": 1718 }, { "epoch": 1.6035447761194028, "grad_norm": 0.675086097330575, "learning_rate": 9.38613521441179e-07, "loss": 0.1069, "step": 1719 }, { "epoch": 1.6044776119402986, "grad_norm": 0.6358540782823126, "learning_rate": 9.343445590514655e-07, "loss": 0.096, "step": 1720 }, { "epoch": 1.605410447761194, "grad_norm": 0.7016577574921877, "learning_rate": 9.300843259870063e-07, "loss": 0.1063, "step": 1721 }, { "epoch": 1.6063432835820897, "grad_norm": 0.6648001658687313, "learning_rate": 9.258328313949039e-07, "loss": 0.1124, "step": 1722 }, { "epoch": 1.607276119402985, "grad_norm": 0.6872002416357819, "learning_rate": 9.215900844034953e-07, "loss": 0.1444, "step": 1723 }, { "epoch": 1.6082089552238807, "grad_norm": 0.6373416958896619, "learning_rate": 9.173560941223359e-07, "loss": 0.1019, "step": 1724 }, { "epoch": 1.609141791044776, "grad_norm": 0.679495685356042, "learning_rate": 9.131308696421825e-07, "loss": 0.1063, "step": 1725 }, { "epoch": 1.6100746268656716, "grad_norm": 0.6714034854230727, "learning_rate": 9.089144200349687e-07, "loss": 0.1103, "step": 1726 }, { "epoch": 1.6110074626865671, "grad_norm": 0.6639539741160575, "learning_rate": 9.047067543537891e-07, "loss": 0.1009, "step": 1727 }, { "epoch": 1.6119402985074627, "grad_norm": 0.6324806742007469, "learning_rate": 9.005078816328772e-07, "loss": 0.0856, "step": 1728 }, { "epoch": 1.6128731343283582, "grad_norm": 0.6558325145118701, "learning_rate": 8.963178108875886e-07, "loss": 0.0996, "step": 1729 }, { "epoch": 1.6138059701492538, "grad_norm": 0.6451027233765737, "learning_rate": 8.92136551114377e-07, "loss": 0.1087, "step": 1730 }, { "epoch": 1.6147388059701493, "grad_norm": 0.6175408616982773, "learning_rate": 8.879641112907822e-07, "loss": 0.0892, "step": 1731 }, { "epoch": 1.6156716417910446, "grad_norm": 0.6370533675713849, "learning_rate": 8.838005003754046e-07, "loss": 0.1021, "step": 1732 }, { "epoch": 1.6166044776119404, "grad_norm": 0.6689874181288303, "learning_rate": 8.796457273078884e-07, "loss": 0.1298, "step": 1733 }, { "epoch": 1.6175373134328357, "grad_norm": 0.6686913688968573, "learning_rate": 8.754998010089033e-07, "loss": 0.1163, "step": 1734 }, { "epoch": 1.6184701492537314, "grad_norm": 0.6559728578682665, "learning_rate": 8.713627303801237e-07, "loss": 0.1145, "step": 1735 }, { "epoch": 1.6194029850746268, "grad_norm": 0.660898305222491, "learning_rate": 8.672345243042068e-07, "loss": 0.1033, "step": 1736 }, { "epoch": 1.6203358208955225, "grad_norm": 0.7286880976687977, "learning_rate": 8.631151916447833e-07, "loss": 0.154, "step": 1737 }, { "epoch": 1.6212686567164178, "grad_norm": 0.6788094073391633, "learning_rate": 8.590047412464247e-07, "loss": 0.1152, "step": 1738 }, { "epoch": 1.6222014925373134, "grad_norm": 0.6319907816060438, "learning_rate": 8.549031819346365e-07, "loss": 0.0964, "step": 1739 }, { "epoch": 1.623134328358209, "grad_norm": 0.597616655222107, "learning_rate": 8.50810522515833e-07, "loss": 0.0877, "step": 1740 }, { "epoch": 1.6240671641791045, "grad_norm": 0.6968311281689527, "learning_rate": 8.467267717773198e-07, "loss": 0.129, "step": 1741 }, { "epoch": 1.625, "grad_norm": 0.6239244038211161, "learning_rate": 8.426519384872733e-07, "loss": 0.0928, "step": 1742 }, { "epoch": 1.6259328358208955, "grad_norm": 0.6268189447709845, "learning_rate": 8.385860313947269e-07, "loss": 0.0897, "step": 1743 }, { "epoch": 1.626865671641791, "grad_norm": 0.6372561264176504, "learning_rate": 8.345290592295429e-07, "loss": 0.0854, "step": 1744 }, { "epoch": 1.6277985074626866, "grad_norm": 0.7158784043667057, "learning_rate": 8.304810307024041e-07, "loss": 0.1416, "step": 1745 }, { "epoch": 1.6287313432835822, "grad_norm": 0.6957002017475541, "learning_rate": 8.264419545047892e-07, "loss": 0.1532, "step": 1746 }, { "epoch": 1.6296641791044775, "grad_norm": 0.7098387039652623, "learning_rate": 8.224118393089553e-07, "loss": 0.12, "step": 1747 }, { "epoch": 1.6305970149253732, "grad_norm": 0.6455660950769614, "learning_rate": 8.183906937679214e-07, "loss": 0.1006, "step": 1748 }, { "epoch": 1.6315298507462686, "grad_norm": 0.6191033862870169, "learning_rate": 8.143785265154436e-07, "loss": 0.0996, "step": 1749 }, { "epoch": 1.6324626865671643, "grad_norm": 0.6675443647600169, "learning_rate": 8.103753461660046e-07, "loss": 0.0907, "step": 1750 }, { "epoch": 1.6333955223880596, "grad_norm": 0.6505829526922129, "learning_rate": 8.063811613147888e-07, "loss": 0.1014, "step": 1751 }, { "epoch": 1.6343283582089554, "grad_norm": 0.7250430198514108, "learning_rate": 8.02395980537668e-07, "loss": 0.1502, "step": 1752 }, { "epoch": 1.6352611940298507, "grad_norm": 0.6749372328576574, "learning_rate": 7.984198123911819e-07, "loss": 0.1189, "step": 1753 }, { "epoch": 1.6361940298507462, "grad_norm": 0.6255258746267677, "learning_rate": 7.944526654125184e-07, "loss": 0.1098, "step": 1754 }, { "epoch": 1.6371268656716418, "grad_norm": 0.7031537823049455, "learning_rate": 7.904945481194959e-07, "loss": 0.1144, "step": 1755 }, { "epoch": 1.6380597014925373, "grad_norm": 0.6202944380350589, "learning_rate": 7.865454690105472e-07, "loss": 0.0931, "step": 1756 }, { "epoch": 1.6389925373134329, "grad_norm": 0.5934418528266737, "learning_rate": 7.826054365646951e-07, "loss": 0.0793, "step": 1757 }, { "epoch": 1.6399253731343284, "grad_norm": 0.6790447734059825, "learning_rate": 7.786744592415429e-07, "loss": 0.1279, "step": 1758 }, { "epoch": 1.640858208955224, "grad_norm": 0.6602237060660385, "learning_rate": 7.747525454812488e-07, "loss": 0.1009, "step": 1759 }, { "epoch": 1.6417910447761193, "grad_norm": 0.6405868025333883, "learning_rate": 7.708397037045129e-07, "loss": 0.0897, "step": 1760 }, { "epoch": 1.642723880597015, "grad_norm": 0.6562011953724507, "learning_rate": 7.669359423125555e-07, "loss": 0.1107, "step": 1761 }, { "epoch": 1.6436567164179103, "grad_norm": 0.6608470317797746, "learning_rate": 7.630412696871015e-07, "loss": 0.099, "step": 1762 }, { "epoch": 1.644589552238806, "grad_norm": 0.7037478523123497, "learning_rate": 7.591556941903605e-07, "loss": 0.1248, "step": 1763 }, { "epoch": 1.6455223880597014, "grad_norm": 0.678634117511265, "learning_rate": 7.552792241650081e-07, "loss": 0.1165, "step": 1764 }, { "epoch": 1.6464552238805972, "grad_norm": 0.6441966992999458, "learning_rate": 7.514118679341737e-07, "loss": 0.0997, "step": 1765 }, { "epoch": 1.6473880597014925, "grad_norm": 0.6317555272698577, "learning_rate": 7.475536338014156e-07, "loss": 0.1066, "step": 1766 }, { "epoch": 1.648320895522388, "grad_norm": 0.6818951812390617, "learning_rate": 7.437045300507068e-07, "loss": 0.1252, "step": 1767 }, { "epoch": 1.6492537313432836, "grad_norm": 0.6358940103795616, "learning_rate": 7.398645649464175e-07, "loss": 0.1122, "step": 1768 }, { "epoch": 1.650186567164179, "grad_norm": 0.6902574799084824, "learning_rate": 7.360337467332968e-07, "loss": 0.117, "step": 1769 }, { "epoch": 1.6511194029850746, "grad_norm": 0.612982709984487, "learning_rate": 7.322120836364504e-07, "loss": 0.0873, "step": 1770 }, { "epoch": 1.6520522388059702, "grad_norm": 0.6626829207002936, "learning_rate": 7.283995838613323e-07, "loss": 0.1059, "step": 1771 }, { "epoch": 1.6529850746268657, "grad_norm": 0.6501539151399879, "learning_rate": 7.245962555937192e-07, "loss": 0.0917, "step": 1772 }, { "epoch": 1.653917910447761, "grad_norm": 0.5852053129475824, "learning_rate": 7.208021069996962e-07, "loss": 0.0901, "step": 1773 }, { "epoch": 1.6548507462686568, "grad_norm": 0.6862063628793922, "learning_rate": 7.170171462256404e-07, "loss": 0.1013, "step": 1774 }, { "epoch": 1.6557835820895521, "grad_norm": 0.6429247076772622, "learning_rate": 7.132413813982003e-07, "loss": 0.1002, "step": 1775 }, { "epoch": 1.6567164179104479, "grad_norm": 0.6528632391692349, "learning_rate": 7.094748206242797e-07, "loss": 0.1034, "step": 1776 }, { "epoch": 1.6576492537313432, "grad_norm": 0.6485610532152462, "learning_rate": 7.057174719910198e-07, "loss": 0.0857, "step": 1777 }, { "epoch": 1.658582089552239, "grad_norm": 0.6578058287956414, "learning_rate": 7.019693435657848e-07, "loss": 0.1018, "step": 1778 }, { "epoch": 1.6595149253731343, "grad_norm": 0.6776269154954033, "learning_rate": 6.982304433961406e-07, "loss": 0.1152, "step": 1779 }, { "epoch": 1.6604477611940298, "grad_norm": 0.5842848765806773, "learning_rate": 6.945007795098402e-07, "loss": 0.0813, "step": 1780 }, { "epoch": 1.6613805970149254, "grad_norm": 0.6279513025351902, "learning_rate": 6.907803599148049e-07, "loss": 0.0996, "step": 1781 }, { "epoch": 1.662313432835821, "grad_norm": 0.7225211276619226, "learning_rate": 6.870691925991085e-07, "loss": 0.1333, "step": 1782 }, { "epoch": 1.6632462686567164, "grad_norm": 0.6630100942442801, "learning_rate": 6.833672855309565e-07, "loss": 0.1099, "step": 1783 }, { "epoch": 1.664179104477612, "grad_norm": 0.6511703633471069, "learning_rate": 6.796746466586757e-07, "loss": 0.1113, "step": 1784 }, { "epoch": 1.6651119402985075, "grad_norm": 0.6864285152790113, "learning_rate": 6.759912839106908e-07, "loss": 0.1366, "step": 1785 }, { "epoch": 1.6660447761194028, "grad_norm": 0.6158760678897395, "learning_rate": 6.723172051955102e-07, "loss": 0.0821, "step": 1786 }, { "epoch": 1.6669776119402986, "grad_norm": 0.613395252176143, "learning_rate": 6.686524184017102e-07, "loss": 0.0931, "step": 1787 }, { "epoch": 1.667910447761194, "grad_norm": 0.6444312073344287, "learning_rate": 6.649969313979149e-07, "loss": 0.1032, "step": 1788 }, { "epoch": 1.6688432835820897, "grad_norm": 0.7030956812056445, "learning_rate": 6.613507520327811e-07, "loss": 0.1114, "step": 1789 }, { "epoch": 1.669776119402985, "grad_norm": 0.7062815982761138, "learning_rate": 6.577138881349804e-07, "loss": 0.1175, "step": 1790 }, { "epoch": 1.6707089552238807, "grad_norm": 0.6380822018243477, "learning_rate": 6.540863475131853e-07, "loss": 0.1014, "step": 1791 }, { "epoch": 1.671641791044776, "grad_norm": 0.6608989722176818, "learning_rate": 6.50468137956049e-07, "loss": 0.1206, "step": 1792 }, { "epoch": 1.6725746268656716, "grad_norm": 0.6322511368767614, "learning_rate": 6.468592672321905e-07, "loss": 0.106, "step": 1793 }, { "epoch": 1.6735074626865671, "grad_norm": 0.5828819952831086, "learning_rate": 6.432597430901782e-07, "loss": 0.0809, "step": 1794 }, { "epoch": 1.6744402985074627, "grad_norm": 0.710422852445745, "learning_rate": 6.396695732585123e-07, "loss": 0.1463, "step": 1795 }, { "epoch": 1.6753731343283582, "grad_norm": 0.6293671910240368, "learning_rate": 6.360887654456066e-07, "loss": 0.0919, "step": 1796 }, { "epoch": 1.6763059701492538, "grad_norm": 0.6488106340111748, "learning_rate": 6.32517327339775e-07, "loss": 0.0918, "step": 1797 }, { "epoch": 1.6772388059701493, "grad_norm": 0.6679453280768429, "learning_rate": 6.289552666092153e-07, "loss": 0.1323, "step": 1798 }, { "epoch": 1.6781716417910446, "grad_norm": 0.6612701418373605, "learning_rate": 6.254025909019889e-07, "loss": 0.1105, "step": 1799 }, { "epoch": 1.6791044776119404, "grad_norm": 0.6588789414969743, "learning_rate": 6.218593078460084e-07, "loss": 0.1046, "step": 1800 }, { "epoch": 1.6800373134328357, "grad_norm": 0.6526779827080932, "learning_rate": 6.183254250490195e-07, "loss": 0.1007, "step": 1801 }, { "epoch": 1.6809701492537314, "grad_norm": 0.6913371115141022, "learning_rate": 6.14800950098583e-07, "loss": 0.1326, "step": 1802 }, { "epoch": 1.6819029850746268, "grad_norm": 0.6501854521890046, "learning_rate": 6.112858905620622e-07, "loss": 0.1023, "step": 1803 }, { "epoch": 1.6828358208955225, "grad_norm": 0.6773162706997531, "learning_rate": 6.077802539866023e-07, "loss": 0.1125, "step": 1804 }, { "epoch": 1.6837686567164178, "grad_norm": 0.693246571820606, "learning_rate": 6.042840478991185e-07, "loss": 0.1149, "step": 1805 }, { "epoch": 1.6847014925373134, "grad_norm": 0.645983582563893, "learning_rate": 6.007972798062783e-07, "loss": 0.1014, "step": 1806 }, { "epoch": 1.685634328358209, "grad_norm": 0.6721271865078816, "learning_rate": 5.973199571944843e-07, "loss": 0.1119, "step": 1807 }, { "epoch": 1.6865671641791045, "grad_norm": 0.6834225371662415, "learning_rate": 5.938520875298587e-07, "loss": 0.1187, "step": 1808 }, { "epoch": 1.6875, "grad_norm": 0.6251253504982981, "learning_rate": 5.903936782582253e-07, "loss": 0.1003, "step": 1809 }, { "epoch": 1.6884328358208955, "grad_norm": 0.6558448840729171, "learning_rate": 5.869447368050995e-07, "loss": 0.1129, "step": 1810 }, { "epoch": 1.689365671641791, "grad_norm": 0.6350414048206922, "learning_rate": 5.835052705756661e-07, "loss": 0.0837, "step": 1811 }, { "epoch": 1.6902985074626866, "grad_norm": 0.6155524664695903, "learning_rate": 5.80075286954766e-07, "loss": 0.095, "step": 1812 }, { "epoch": 1.6912313432835822, "grad_norm": 0.6799689634311253, "learning_rate": 5.766547933068806e-07, "loss": 0.1108, "step": 1813 }, { "epoch": 1.6921641791044775, "grad_norm": 0.6865559360020815, "learning_rate": 5.732437969761156e-07, "loss": 0.1127, "step": 1814 }, { "epoch": 1.6930970149253732, "grad_norm": 0.6394902969415759, "learning_rate": 5.698423052861835e-07, "loss": 0.1043, "step": 1815 }, { "epoch": 1.6940298507462686, "grad_norm": 0.6761352316857017, "learning_rate": 5.664503255403925e-07, "loss": 0.1127, "step": 1816 }, { "epoch": 1.6949626865671643, "grad_norm": 0.654716525527515, "learning_rate": 5.630678650216236e-07, "loss": 0.0945, "step": 1817 }, { "epoch": 1.6958955223880596, "grad_norm": 0.6840716258734394, "learning_rate": 5.596949309923233e-07, "loss": 0.1129, "step": 1818 }, { "epoch": 1.6968283582089554, "grad_norm": 0.6826644366911423, "learning_rate": 5.56331530694481e-07, "loss": 0.1116, "step": 1819 }, { "epoch": 1.6977611940298507, "grad_norm": 0.632045708878701, "learning_rate": 5.529776713496182e-07, "loss": 0.0894, "step": 1820 }, { "epoch": 1.6986940298507462, "grad_norm": 0.6512400349945926, "learning_rate": 5.496333601587711e-07, "loss": 0.1055, "step": 1821 }, { "epoch": 1.6996268656716418, "grad_norm": 0.7017357539708079, "learning_rate": 5.462986043024726e-07, "loss": 0.1294, "step": 1822 }, { "epoch": 1.7005597014925373, "grad_norm": 0.6878759999662828, "learning_rate": 5.429734109407426e-07, "loss": 0.1208, "step": 1823 }, { "epoch": 1.7014925373134329, "grad_norm": 0.6502363280665532, "learning_rate": 5.396577872130676e-07, "loss": 0.1041, "step": 1824 }, { "epoch": 1.7024253731343284, "grad_norm": 0.6509989696727869, "learning_rate": 5.363517402383878e-07, "loss": 0.1049, "step": 1825 }, { "epoch": 1.703358208955224, "grad_norm": 0.6750889536314447, "learning_rate": 5.330552771150821e-07, "loss": 0.0951, "step": 1826 }, { "epoch": 1.7042910447761193, "grad_norm": 0.6056095358173967, "learning_rate": 5.297684049209511e-07, "loss": 0.0932, "step": 1827 }, { "epoch": 1.705223880597015, "grad_norm": 0.6653663331768904, "learning_rate": 5.264911307132009e-07, "loss": 0.111, "step": 1828 }, { "epoch": 1.7061567164179103, "grad_norm": 0.6908194392627103, "learning_rate": 5.232234615284337e-07, "loss": 0.129, "step": 1829 }, { "epoch": 1.707089552238806, "grad_norm": 0.6520058417207232, "learning_rate": 5.19965404382628e-07, "loss": 0.1011, "step": 1830 }, { "epoch": 1.7080223880597014, "grad_norm": 0.6982061622671883, "learning_rate": 5.167169662711202e-07, "loss": 0.1327, "step": 1831 }, { "epoch": 1.7089552238805972, "grad_norm": 0.6023389265157328, "learning_rate": 5.134781541685996e-07, "loss": 0.0878, "step": 1832 }, { "epoch": 1.7098880597014925, "grad_norm": 0.6954672775612969, "learning_rate": 5.102489750290834e-07, "loss": 0.1124, "step": 1833 }, { "epoch": 1.710820895522388, "grad_norm": 0.645204475107175, "learning_rate": 5.070294357859096e-07, "loss": 0.096, "step": 1834 }, { "epoch": 1.7117537313432836, "grad_norm": 0.6621641473782897, "learning_rate": 5.03819543351714e-07, "loss": 0.1038, "step": 1835 }, { "epoch": 1.712686567164179, "grad_norm": 0.6778194832762802, "learning_rate": 5.006193046184238e-07, "loss": 0.1061, "step": 1836 }, { "epoch": 1.7136194029850746, "grad_norm": 0.7022462684641781, "learning_rate": 4.974287264572363e-07, "loss": 0.1175, "step": 1837 }, { "epoch": 1.7145522388059702, "grad_norm": 0.6720297431240653, "learning_rate": 4.942478157186087e-07, "loss": 0.1001, "step": 1838 }, { "epoch": 1.7154850746268657, "grad_norm": 0.6687229984626992, "learning_rate": 4.910765792322397e-07, "loss": 0.1032, "step": 1839 }, { "epoch": 1.716417910447761, "grad_norm": 0.6796471926559255, "learning_rate": 4.879150238070585e-07, "loss": 0.118, "step": 1840 }, { "epoch": 1.7173507462686568, "grad_norm": 0.705576428339069, "learning_rate": 4.847631562312049e-07, "loss": 0.1394, "step": 1841 }, { "epoch": 1.7182835820895521, "grad_norm": 0.6378897906127744, "learning_rate": 4.816209832720214e-07, "loss": 0.096, "step": 1842 }, { "epoch": 1.7192164179104479, "grad_norm": 0.6517627385455933, "learning_rate": 4.78488511676034e-07, "loss": 0.0995, "step": 1843 }, { "epoch": 1.7201492537313432, "grad_norm": 0.6407259597667398, "learning_rate": 4.753657481689372e-07, "loss": 0.0919, "step": 1844 }, { "epoch": 1.721082089552239, "grad_norm": 0.6346921306589071, "learning_rate": 4.7225269945558483e-07, "loss": 0.0939, "step": 1845 }, { "epoch": 1.7220149253731343, "grad_norm": 0.6416696445713742, "learning_rate": 4.691493722199697e-07, "loss": 0.1032, "step": 1846 }, { "epoch": 1.7229477611940298, "grad_norm": 0.6366669165436657, "learning_rate": 4.6605577312521354e-07, "loss": 0.1111, "step": 1847 }, { "epoch": 1.7238805970149254, "grad_norm": 0.6175232981324438, "learning_rate": 4.6297190881354816e-07, "loss": 0.0796, "step": 1848 }, { "epoch": 1.724813432835821, "grad_norm": 0.6284010746488677, "learning_rate": 4.598977859063064e-07, "loss": 0.0823, "step": 1849 }, { "epoch": 1.7257462686567164, "grad_norm": 0.6801492543017185, "learning_rate": 4.5683341100390464e-07, "loss": 0.1312, "step": 1850 }, { "epoch": 1.726679104477612, "grad_norm": 0.6656680890418982, "learning_rate": 4.537787906858293e-07, "loss": 0.1115, "step": 1851 }, { "epoch": 1.7276119402985075, "grad_norm": 0.5933643932726181, "learning_rate": 4.507339315106235e-07, "loss": 0.0792, "step": 1852 }, { "epoch": 1.7285447761194028, "grad_norm": 0.644730971203101, "learning_rate": 4.476988400158716e-07, "loss": 0.1027, "step": 1853 }, { "epoch": 1.7294776119402986, "grad_norm": 0.6497949830357228, "learning_rate": 4.446735227181853e-07, "loss": 0.1026, "step": 1854 }, { "epoch": 1.730410447761194, "grad_norm": 0.605976034747203, "learning_rate": 4.4165798611319145e-07, "loss": 0.0834, "step": 1855 }, { "epoch": 1.7313432835820897, "grad_norm": 0.6740198994688358, "learning_rate": 4.386522366755169e-07, "loss": 0.1113, "step": 1856 }, { "epoch": 1.732276119402985, "grad_norm": 0.6625221843573914, "learning_rate": 4.3565628085877275e-07, "loss": 0.0996, "step": 1857 }, { "epoch": 1.7332089552238807, "grad_norm": 0.6458234253314198, "learning_rate": 4.326701250955445e-07, "loss": 0.1141, "step": 1858 }, { "epoch": 1.734141791044776, "grad_norm": 0.6343995114606871, "learning_rate": 4.296937757973757e-07, "loss": 0.0902, "step": 1859 }, { "epoch": 1.7350746268656716, "grad_norm": 0.6382900750121169, "learning_rate": 4.267272393547539e-07, "loss": 0.1184, "step": 1860 }, { "epoch": 1.7360074626865671, "grad_norm": 0.6760759147499171, "learning_rate": 4.2377052213709634e-07, "loss": 0.1168, "step": 1861 }, { "epoch": 1.7369402985074627, "grad_norm": 0.6441842858842316, "learning_rate": 4.208236304927404e-07, "loss": 0.1114, "step": 1862 }, { "epoch": 1.7378731343283582, "grad_norm": 0.6209706354071617, "learning_rate": 4.178865707489249e-07, "loss": 0.0855, "step": 1863 }, { "epoch": 1.7388059701492538, "grad_norm": 0.6602198568902595, "learning_rate": 4.149593492117793e-07, "loss": 0.1082, "step": 1864 }, { "epoch": 1.7397388059701493, "grad_norm": 0.6634769859129465, "learning_rate": 4.120419721663099e-07, "loss": 0.0928, "step": 1865 }, { "epoch": 1.7406716417910446, "grad_norm": 0.6666920733909253, "learning_rate": 4.091344458763863e-07, "loss": 0.1187, "step": 1866 }, { "epoch": 1.7416044776119404, "grad_norm": 0.6122233453731917, "learning_rate": 4.062367765847258e-07, "loss": 0.0965, "step": 1867 }, { "epoch": 1.7425373134328357, "grad_norm": 0.679010024238431, "learning_rate": 4.03348970512884e-07, "loss": 0.1286, "step": 1868 }, { "epoch": 1.7434701492537314, "grad_norm": 0.6918348474530076, "learning_rate": 4.0047103386123777e-07, "loss": 0.1263, "step": 1869 }, { "epoch": 1.7444029850746268, "grad_norm": 0.6361273579368828, "learning_rate": 3.9760297280897533e-07, "loss": 0.1043, "step": 1870 }, { "epoch": 1.7453358208955225, "grad_norm": 0.6515822514574073, "learning_rate": 3.9474479351407803e-07, "loss": 0.126, "step": 1871 }, { "epoch": 1.7462686567164178, "grad_norm": 0.633917292461442, "learning_rate": 3.918965021133131e-07, "loss": 0.1052, "step": 1872 }, { "epoch": 1.7472014925373134, "grad_norm": 0.6375921160308542, "learning_rate": 3.8905810472221636e-07, "loss": 0.083, "step": 1873 }, { "epoch": 1.748134328358209, "grad_norm": 0.6738619864257229, "learning_rate": 3.8622960743508074e-07, "loss": 0.11, "step": 1874 }, { "epoch": 1.7490671641791045, "grad_norm": 0.6747159809866141, "learning_rate": 3.834110163249416e-07, "loss": 0.105, "step": 1875 }, { "epoch": 1.75, "grad_norm": 0.5874881933144485, "learning_rate": 3.8060233744356634e-07, "loss": 0.077, "step": 1876 }, { "epoch": 1.7509328358208955, "grad_norm": 0.6792413636516135, "learning_rate": 3.7780357682143943e-07, "loss": 0.1189, "step": 1877 }, { "epoch": 1.751865671641791, "grad_norm": 0.6664140576981115, "learning_rate": 3.75014740467749e-07, "loss": 0.1104, "step": 1878 }, { "epoch": 1.7527985074626866, "grad_norm": 0.6697409735909573, "learning_rate": 3.72235834370378e-07, "loss": 0.1153, "step": 1879 }, { "epoch": 1.7537313432835822, "grad_norm": 0.6823356036188537, "learning_rate": 3.6946686449588267e-07, "loss": 0.1353, "step": 1880 }, { "epoch": 1.7546641791044775, "grad_norm": 0.644480793052808, "learning_rate": 3.667078367894905e-07, "loss": 0.0991, "step": 1881 }, { "epoch": 1.7555970149253732, "grad_norm": 0.64904240140218, "learning_rate": 3.639587571750802e-07, "loss": 0.1176, "step": 1882 }, { "epoch": 1.7565298507462686, "grad_norm": 0.6717123931625297, "learning_rate": 3.612196315551719e-07, "loss": 0.1117, "step": 1883 }, { "epoch": 1.7574626865671643, "grad_norm": 0.6691894909959042, "learning_rate": 3.584904658109106e-07, "loss": 0.0969, "step": 1884 }, { "epoch": 1.7583955223880596, "grad_norm": 0.6452762757260058, "learning_rate": 3.557712658020607e-07, "loss": 0.0797, "step": 1885 }, { "epoch": 1.7593283582089554, "grad_norm": 0.6135575305264562, "learning_rate": 3.5306203736698686e-07, "loss": 0.0772, "step": 1886 }, { "epoch": 1.7602611940298507, "grad_norm": 0.6115727755005295, "learning_rate": 3.503627863226455e-07, "loss": 0.0828, "step": 1887 }, { "epoch": 1.7611940298507462, "grad_norm": 0.723740813214594, "learning_rate": 3.4767351846456744e-07, "loss": 0.0996, "step": 1888 }, { "epoch": 1.7621268656716418, "grad_norm": 0.6585057909923171, "learning_rate": 3.4499423956685207e-07, "loss": 0.135, "step": 1889 }, { "epoch": 1.7630597014925373, "grad_norm": 0.6981628859312276, "learning_rate": 3.423249553821506e-07, "loss": 0.1226, "step": 1890 }, { "epoch": 1.7639925373134329, "grad_norm": 0.628988892724146, "learning_rate": 3.3966567164165466e-07, "loss": 0.0946, "step": 1891 }, { "epoch": 1.7649253731343284, "grad_norm": 0.6250625672762206, "learning_rate": 3.37016394055083e-07, "loss": 0.0992, "step": 1892 }, { "epoch": 1.765858208955224, "grad_norm": 0.6192996496003866, "learning_rate": 3.343771283106728e-07, "loss": 0.1051, "step": 1893 }, { "epoch": 1.7667910447761193, "grad_norm": 0.6130888845332841, "learning_rate": 3.3174788007516166e-07, "loss": 0.0857, "step": 1894 }, { "epoch": 1.767723880597015, "grad_norm": 0.6337520133972055, "learning_rate": 3.2912865499378053e-07, "loss": 0.1044, "step": 1895 }, { "epoch": 1.7686567164179103, "grad_norm": 0.7363876371619529, "learning_rate": 3.2651945869024035e-07, "loss": 0.1556, "step": 1896 }, { "epoch": 1.769589552238806, "grad_norm": 0.6970958381459849, "learning_rate": 3.239202967667182e-07, "loss": 0.1319, "step": 1897 }, { "epoch": 1.7705223880597014, "grad_norm": 0.6474165784677441, "learning_rate": 3.2133117480384613e-07, "loss": 0.099, "step": 1898 }, { "epoch": 1.7714552238805972, "grad_norm": 0.6332817634335476, "learning_rate": 3.187520983607012e-07, "loss": 0.0965, "step": 1899 }, { "epoch": 1.7723880597014925, "grad_norm": 0.6454683352926563, "learning_rate": 3.1618307297479055e-07, "loss": 0.1037, "step": 1900 }, { "epoch": 1.773320895522388, "grad_norm": 0.6315913263807102, "learning_rate": 3.1362410416204024e-07, "loss": 0.0911, "step": 1901 }, { "epoch": 1.7742537313432836, "grad_norm": 0.6782763796377491, "learning_rate": 3.1107519741678526e-07, "loss": 0.1114, "step": 1902 }, { "epoch": 1.775186567164179, "grad_norm": 0.6396961604271327, "learning_rate": 3.0853635821175676e-07, "loss": 0.1067, "step": 1903 }, { "epoch": 1.7761194029850746, "grad_norm": 0.667567703264931, "learning_rate": 3.0600759199806815e-07, "loss": 0.1122, "step": 1904 }, { "epoch": 1.7770522388059702, "grad_norm": 0.6581632534606024, "learning_rate": 3.0348890420520693e-07, "loss": 0.115, "step": 1905 }, { "epoch": 1.7779850746268657, "grad_norm": 0.6686063299418542, "learning_rate": 3.0098030024102107e-07, "loss": 0.1106, "step": 1906 }, { "epoch": 1.778917910447761, "grad_norm": 0.6387221659887993, "learning_rate": 2.9848178549170604e-07, "loss": 0.0953, "step": 1907 }, { "epoch": 1.7798507462686568, "grad_norm": 0.6119068156100763, "learning_rate": 2.959933653217967e-07, "loss": 0.0783, "step": 1908 }, { "epoch": 1.7807835820895521, "grad_norm": 0.6174788344069452, "learning_rate": 2.9351504507415305e-07, "loss": 0.099, "step": 1909 }, { "epoch": 1.7817164179104479, "grad_norm": 0.6627046987799924, "learning_rate": 2.9104683006995147e-07, "loss": 0.1161, "step": 1910 }, { "epoch": 1.7826492537313432, "grad_norm": 0.6397162039467906, "learning_rate": 2.885887256086678e-07, "loss": 0.0981, "step": 1911 }, { "epoch": 1.783582089552239, "grad_norm": 0.6512833837271518, "learning_rate": 2.8614073696807297e-07, "loss": 0.1016, "step": 1912 }, { "epoch": 1.7845149253731343, "grad_norm": 0.6771120472210902, "learning_rate": 2.837028694042182e-07, "loss": 0.1076, "step": 1913 }, { "epoch": 1.7854477611940298, "grad_norm": 0.6144455394318806, "learning_rate": 2.812751281514203e-07, "loss": 0.092, "step": 1914 }, { "epoch": 1.7863805970149254, "grad_norm": 0.6421925283663953, "learning_rate": 2.7885751842225804e-07, "loss": 0.0995, "step": 1915 }, { "epoch": 1.787313432835821, "grad_norm": 0.6788399169247629, "learning_rate": 2.7645004540755527e-07, "loss": 0.1025, "step": 1916 }, { "epoch": 1.7882462686567164, "grad_norm": 0.6859479097073858, "learning_rate": 2.74052714276371e-07, "loss": 0.1334, "step": 1917 }, { "epoch": 1.789179104477612, "grad_norm": 0.6221819207261343, "learning_rate": 2.716655301759902e-07, "loss": 0.0818, "step": 1918 }, { "epoch": 1.7901119402985075, "grad_norm": 0.7296422054416469, "learning_rate": 2.6928849823190995e-07, "loss": 0.1436, "step": 1919 }, { "epoch": 1.7910447761194028, "grad_norm": 0.6436014875889204, "learning_rate": 2.669216235478295e-07, "loss": 0.1048, "step": 1920 }, { "epoch": 1.7919776119402986, "grad_norm": 0.677694195076697, "learning_rate": 2.6456491120564034e-07, "loss": 0.1037, "step": 1921 }, { "epoch": 1.792910447761194, "grad_norm": 0.6842535395642458, "learning_rate": 2.622183662654143e-07, "loss": 0.1135, "step": 1922 }, { "epoch": 1.7938432835820897, "grad_norm": 0.6424444456806488, "learning_rate": 2.59881993765394e-07, "loss": 0.0993, "step": 1923 }, { "epoch": 1.794776119402985, "grad_norm": 0.6915164949555478, "learning_rate": 2.575557987219784e-07, "loss": 0.1243, "step": 1924 }, { "epoch": 1.7957089552238807, "grad_norm": 0.728490009558956, "learning_rate": 2.5523978612971623e-07, "loss": 0.1196, "step": 1925 }, { "epoch": 1.796641791044776, "grad_norm": 0.6578069703487425, "learning_rate": 2.529339609612941e-07, "loss": 0.0994, "step": 1926 }, { "epoch": 1.7975746268656716, "grad_norm": 0.6608008624484439, "learning_rate": 2.506383281675229e-07, "loss": 0.098, "step": 1927 }, { "epoch": 1.7985074626865671, "grad_norm": 0.673412270885401, "learning_rate": 2.4835289267733263e-07, "loss": 0.1073, "step": 1928 }, { "epoch": 1.7994402985074627, "grad_norm": 0.6640229140429172, "learning_rate": 2.4607765939775706e-07, "loss": 0.1215, "step": 1929 }, { "epoch": 1.8003731343283582, "grad_norm": 0.6118928122911703, "learning_rate": 2.4381263321392514e-07, "loss": 0.0891, "step": 1930 }, { "epoch": 1.8013059701492538, "grad_norm": 0.6454495710068006, "learning_rate": 2.415578189890505e-07, "loss": 0.1027, "step": 1931 }, { "epoch": 1.8022388059701493, "grad_norm": 0.6840339959639248, "learning_rate": 2.3931322156442117e-07, "loss": 0.1064, "step": 1932 }, { "epoch": 1.8031716417910446, "grad_norm": 0.6092848641949081, "learning_rate": 2.3707884575938645e-07, "loss": 0.0945, "step": 1933 }, { "epoch": 1.8041044776119404, "grad_norm": 0.7138452210905425, "learning_rate": 2.348546963713516e-07, "loss": 0.1602, "step": 1934 }, { "epoch": 1.8050373134328357, "grad_norm": 0.7007579681989229, "learning_rate": 2.3264077817576446e-07, "loss": 0.149, "step": 1935 }, { "epoch": 1.8059701492537314, "grad_norm": 0.6490687321389229, "learning_rate": 2.3043709592610486e-07, "loss": 0.0946, "step": 1936 }, { "epoch": 1.8069029850746268, "grad_norm": 0.6576971632810565, "learning_rate": 2.2824365435387573e-07, "loss": 0.1053, "step": 1937 }, { "epoch": 1.8078358208955225, "grad_norm": 0.6791918723864737, "learning_rate": 2.2606045816859047e-07, "loss": 0.1155, "step": 1938 }, { "epoch": 1.8087686567164178, "grad_norm": 0.5838527860651224, "learning_rate": 2.2388751205776826e-07, "loss": 0.073, "step": 1939 }, { "epoch": 1.8097014925373134, "grad_norm": 0.6203724603414665, "learning_rate": 2.2172482068691658e-07, "loss": 0.0912, "step": 1940 }, { "epoch": 1.810634328358209, "grad_norm": 0.7003419195257842, "learning_rate": 2.1957238869952767e-07, "loss": 0.1355, "step": 1941 }, { "epoch": 1.8115671641791045, "grad_norm": 0.6500146930929165, "learning_rate": 2.174302207170653e-07, "loss": 0.0943, "step": 1942 }, { "epoch": 1.8125, "grad_norm": 0.6644138220899628, "learning_rate": 2.152983213389559e-07, "loss": 0.1193, "step": 1943 }, { "epoch": 1.8134328358208955, "grad_norm": 0.6672956149205306, "learning_rate": 2.1317669514257678e-07, "loss": 0.1016, "step": 1944 }, { "epoch": 1.814365671641791, "grad_norm": 0.6597041915670772, "learning_rate": 2.1106534668324963e-07, "loss": 0.1032, "step": 1945 }, { "epoch": 1.8152985074626866, "grad_norm": 0.6452309527396978, "learning_rate": 2.0896428049422768e-07, "loss": 0.1087, "step": 1946 }, { "epoch": 1.8162313432835822, "grad_norm": 0.6671135063410687, "learning_rate": 2.0687350108668736e-07, "loss": 0.128, "step": 1947 }, { "epoch": 1.8171641791044775, "grad_norm": 0.6461293537967087, "learning_rate": 2.0479301294971943e-07, "loss": 0.1072, "step": 1948 }, { "epoch": 1.8180970149253732, "grad_norm": 0.6718541547223441, "learning_rate": 2.0272282055031677e-07, "loss": 0.1071, "step": 1949 }, { "epoch": 1.8190298507462686, "grad_norm": 0.6650747958218103, "learning_rate": 2.006629283333694e-07, "loss": 0.1127, "step": 1950 }, { "epoch": 1.8199626865671643, "grad_norm": 0.6400174302682149, "learning_rate": 1.986133407216473e-07, "loss": 0.0882, "step": 1951 }, { "epoch": 1.8208955223880596, "grad_norm": 0.6539068163561619, "learning_rate": 1.9657406211579966e-07, "loss": 0.0978, "step": 1952 }, { "epoch": 1.8218283582089554, "grad_norm": 0.6456170079877646, "learning_rate": 1.9454509689433855e-07, "loss": 0.1133, "step": 1953 }, { "epoch": 1.8227611940298507, "grad_norm": 0.6118694345570553, "learning_rate": 1.925264494136342e-07, "loss": 0.1002, "step": 1954 }, { "epoch": 1.8236940298507462, "grad_norm": 0.6711072620188462, "learning_rate": 1.9051812400790294e-07, "loss": 0.1181, "step": 1955 }, { "epoch": 1.8246268656716418, "grad_norm": 0.6794908144025533, "learning_rate": 1.885201249891988e-07, "loss": 0.1109, "step": 1956 }, { "epoch": 1.8255597014925373, "grad_norm": 0.6335100911354345, "learning_rate": 1.8653245664740415e-07, "loss": 0.0836, "step": 1957 }, { "epoch": 1.8264925373134329, "grad_norm": 0.6489719159056584, "learning_rate": 1.8455512325022073e-07, "loss": 0.099, "step": 1958 }, { "epoch": 1.8274253731343284, "grad_norm": 0.7078255488568669, "learning_rate": 1.825881290431586e-07, "loss": 0.136, "step": 1959 }, { "epoch": 1.828358208955224, "grad_norm": 0.7260657979416234, "learning_rate": 1.806314782495311e-07, "loss": 0.1443, "step": 1960 }, { "epoch": 1.8292910447761193, "grad_norm": 0.6394542987180041, "learning_rate": 1.7868517507044158e-07, "loss": 0.0964, "step": 1961 }, { "epoch": 1.830223880597015, "grad_norm": 0.6382889149847597, "learning_rate": 1.7674922368477675e-07, "loss": 0.0947, "step": 1962 }, { "epoch": 1.8311567164179103, "grad_norm": 0.6928037880410998, "learning_rate": 1.7482362824919773e-07, "loss": 0.1158, "step": 1963 }, { "epoch": 1.832089552238806, "grad_norm": 0.6731589903124383, "learning_rate": 1.7290839289813065e-07, "loss": 0.1114, "step": 1964 }, { "epoch": 1.8330223880597014, "grad_norm": 0.669936098582753, "learning_rate": 1.71003521743755e-07, "loss": 0.12, "step": 1965 }, { "epoch": 1.8339552238805972, "grad_norm": 0.697015281302923, "learning_rate": 1.6910901887599917e-07, "loss": 0.1315, "step": 1966 }, { "epoch": 1.8348880597014925, "grad_norm": 0.6512428198530752, "learning_rate": 1.6722488836253104e-07, "loss": 0.1055, "step": 1967 }, { "epoch": 1.835820895522388, "grad_norm": 0.6505231192220438, "learning_rate": 1.6535113424874683e-07, "loss": 0.112, "step": 1968 }, { "epoch": 1.8367537313432836, "grad_norm": 0.7153260084781199, "learning_rate": 1.6348776055776393e-07, "loss": 0.128, "step": 1969 }, { "epoch": 1.837686567164179, "grad_norm": 0.671724704716513, "learning_rate": 1.6163477129041204e-07, "loss": 0.1113, "step": 1970 }, { "epoch": 1.8386194029850746, "grad_norm": 0.626274912048965, "learning_rate": 1.5979217042522477e-07, "loss": 0.081, "step": 1971 }, { "epoch": 1.8395522388059702, "grad_norm": 0.6558679973034266, "learning_rate": 1.5795996191842966e-07, "loss": 0.0958, "step": 1972 }, { "epoch": 1.8404850746268657, "grad_norm": 0.6676984908438178, "learning_rate": 1.561381497039427e-07, "loss": 0.1134, "step": 1973 }, { "epoch": 1.841417910447761, "grad_norm": 0.6150741573517586, "learning_rate": 1.5432673769335772e-07, "loss": 0.0888, "step": 1974 }, { "epoch": 1.8423507462686568, "grad_norm": 0.6463444092892342, "learning_rate": 1.525257297759375e-07, "loss": 0.1091, "step": 1975 }, { "epoch": 1.8432835820895521, "grad_norm": 0.6400968238569482, "learning_rate": 1.5073512981860715e-07, "loss": 0.0968, "step": 1976 }, { "epoch": 1.8442164179104479, "grad_norm": 0.6056028744149194, "learning_rate": 1.4895494166594527e-07, "loss": 0.0972, "step": 1977 }, { "epoch": 1.8451492537313432, "grad_norm": 0.6833997438806855, "learning_rate": 1.4718516914017433e-07, "loss": 0.1268, "step": 1978 }, { "epoch": 1.846082089552239, "grad_norm": 0.6222483092184721, "learning_rate": 1.4542581604115258e-07, "loss": 0.087, "step": 1979 }, { "epoch": 1.8470149253731343, "grad_norm": 0.6342465202044912, "learning_rate": 1.4367688614637e-07, "loss": 0.1092, "step": 1980 }, { "epoch": 1.8479477611940298, "grad_norm": 0.6837587683130787, "learning_rate": 1.4193838321093444e-07, "loss": 0.1244, "step": 1981 }, { "epoch": 1.8488805970149254, "grad_norm": 0.6536814086279699, "learning_rate": 1.4021031096756676e-07, "loss": 0.1067, "step": 1982 }, { "epoch": 1.849813432835821, "grad_norm": 0.6467763487448556, "learning_rate": 1.3849267312659286e-07, "loss": 0.1012, "step": 1983 }, { "epoch": 1.8507462686567164, "grad_norm": 0.6983552942709612, "learning_rate": 1.3678547337593494e-07, "loss": 0.1026, "step": 1984 }, { "epoch": 1.851679104477612, "grad_norm": 0.7049375866363263, "learning_rate": 1.3508871538110257e-07, "loss": 0.1331, "step": 1985 }, { "epoch": 1.8526119402985075, "grad_norm": 0.5860978599466266, "learning_rate": 1.3340240278518657e-07, "loss": 0.0814, "step": 1986 }, { "epoch": 1.8535447761194028, "grad_norm": 0.611619411428092, "learning_rate": 1.317265392088507e-07, "loss": 0.0838, "step": 1987 }, { "epoch": 1.8544776119402986, "grad_norm": 0.6694883595964908, "learning_rate": 1.3006112825032447e-07, "loss": 0.123, "step": 1988 }, { "epoch": 1.855410447761194, "grad_norm": 0.6583527310665073, "learning_rate": 1.284061734853931e-07, "loss": 0.0933, "step": 1989 }, { "epoch": 1.8563432835820897, "grad_norm": 0.6284271033447776, "learning_rate": 1.2676167846739308e-07, "loss": 0.0937, "step": 1990 }, { "epoch": 1.857276119402985, "grad_norm": 0.6884750703040088, "learning_rate": 1.2512764672720168e-07, "loss": 0.1223, "step": 1991 }, { "epoch": 1.8582089552238807, "grad_norm": 0.705208193698195, "learning_rate": 1.235040817732297e-07, "loss": 0.1285, "step": 1992 }, { "epoch": 1.859141791044776, "grad_norm": 0.619892115598376, "learning_rate": 1.2189098709141756e-07, "loss": 0.0934, "step": 1993 }, { "epoch": 1.8600746268656716, "grad_norm": 0.6248699218687971, "learning_rate": 1.202883661452231e-07, "loss": 0.1017, "step": 1994 }, { "epoch": 1.8610074626865671, "grad_norm": 0.7189895967539812, "learning_rate": 1.1869622237561662e-07, "loss": 0.1349, "step": 1995 }, { "epoch": 1.8619402985074627, "grad_norm": 0.6609702686388393, "learning_rate": 1.1711455920107306e-07, "loss": 0.1164, "step": 1996 }, { "epoch": 1.8628731343283582, "grad_norm": 0.6723158839099392, "learning_rate": 1.1554338001756482e-07, "loss": 0.1328, "step": 1997 }, { "epoch": 1.8638059701492538, "grad_norm": 0.6977368687384643, "learning_rate": 1.1398268819855285e-07, "loss": 0.1358, "step": 1998 }, { "epoch": 1.8647388059701493, "grad_norm": 0.7042318927937837, "learning_rate": 1.1243248709498278e-07, "loss": 0.1027, "step": 1999 }, { "epoch": 1.8656716417910446, "grad_norm": 0.6310795250074407, "learning_rate": 1.1089278003527438e-07, "loss": 0.0921, "step": 2000 }, { "epoch": 1.8656716417910446, "eval_loss": 0.16207586228847504, "eval_runtime": 4.1975, "eval_samples_per_second": 20.727, "eval_steps_per_second": 5.241, "step": 2000 }, { "epoch": 1.8666044776119404, "grad_norm": 0.6539423391857291, "learning_rate": 1.0936357032531597e-07, "loss": 0.1176, "step": 2001 }, { "epoch": 1.8675373134328357, "grad_norm": 0.7148328048340282, "learning_rate": 1.0784486124845783e-07, "loss": 0.1376, "step": 2002 }, { "epoch": 1.8684701492537314, "grad_norm": 0.7007972843688426, "learning_rate": 1.0633665606550436e-07, "loss": 0.1369, "step": 2003 }, { "epoch": 1.8694029850746268, "grad_norm": 0.6447722936168525, "learning_rate": 1.0483895801470579e-07, "loss": 0.096, "step": 2004 }, { "epoch": 1.8703358208955225, "grad_norm": 0.6338265933782615, "learning_rate": 1.0335177031175425e-07, "loss": 0.1193, "step": 2005 }, { "epoch": 1.8712686567164178, "grad_norm": 0.6596355593334683, "learning_rate": 1.0187509614977387e-07, "loss": 0.0918, "step": 2006 }, { "epoch": 1.8722014925373134, "grad_norm": 0.6431527765089987, "learning_rate": 1.0040893869931623e-07, "loss": 0.0949, "step": 2007 }, { "epoch": 1.873134328358209, "grad_norm": 0.697613678188239, "learning_rate": 9.89533011083521e-08, "loss": 0.1182, "step": 2008 }, { "epoch": 1.8740671641791045, "grad_norm": 0.5982533024167477, "learning_rate": 9.75081865022659e-08, "loss": 0.0795, "step": 2009 }, { "epoch": 1.875, "grad_norm": 0.6467710320938393, "learning_rate": 9.607359798384785e-08, "loss": 0.0953, "step": 2010 }, { "epoch": 1.8759328358208955, "grad_norm": 0.6609168513756316, "learning_rate": 9.464953863328685e-08, "loss": 0.1185, "step": 2011 }, { "epoch": 1.876865671641791, "grad_norm": 0.6108760267772362, "learning_rate": 9.323601150816597e-08, "loss": 0.0936, "step": 2012 }, { "epoch": 1.8777985074626866, "grad_norm": 0.6930239767824389, "learning_rate": 9.18330196434536e-08, "loss": 0.1074, "step": 2013 }, { "epoch": 1.8787313432835822, "grad_norm": 0.6513930064292505, "learning_rate": 9.044056605149898e-08, "loss": 0.1041, "step": 2014 }, { "epoch": 1.8796641791044775, "grad_norm": 0.6366667417563402, "learning_rate": 8.905865372202449e-08, "loss": 0.103, "step": 2015 }, { "epoch": 1.8805970149253732, "grad_norm": 0.6556929427906142, "learning_rate": 8.768728562211948e-08, "loss": 0.1128, "step": 2016 }, { "epoch": 1.8815298507462686, "grad_norm": 0.6989840540228216, "learning_rate": 8.632646469623251e-08, "loss": 0.1189, "step": 2017 }, { "epoch": 1.8824626865671643, "grad_norm": 0.663395123838571, "learning_rate": 8.497619386616917e-08, "loss": 0.1001, "step": 2018 }, { "epoch": 1.8833955223880596, "grad_norm": 0.6541928766619252, "learning_rate": 8.363647603108038e-08, "loss": 0.1051, "step": 2019 }, { "epoch": 1.8843283582089554, "grad_norm": 0.6858019748614033, "learning_rate": 8.230731406746018e-08, "loss": 0.1219, "step": 2020 }, { "epoch": 1.8852611940298507, "grad_norm": 0.6191987348201776, "learning_rate": 8.098871082913795e-08, "loss": 0.0853, "step": 2021 }, { "epoch": 1.8861940298507462, "grad_norm": 0.6848285549207934, "learning_rate": 7.968066914727346e-08, "loss": 0.1246, "step": 2022 }, { "epoch": 1.8871268656716418, "grad_norm": 0.6347903849201899, "learning_rate": 7.838319183034738e-08, "loss": 0.0891, "step": 2023 }, { "epoch": 1.8880597014925373, "grad_norm": 0.6869885807069066, "learning_rate": 7.709628166416128e-08, "loss": 0.1254, "step": 2024 }, { "epoch": 1.8889925373134329, "grad_norm": 0.6246348964209993, "learning_rate": 7.581994141182436e-08, "loss": 0.0854, "step": 2025 }, { "epoch": 1.8899253731343284, "grad_norm": 0.6478248339737976, "learning_rate": 7.455417381375452e-08, "loss": 0.1049, "step": 2026 }, { "epoch": 1.890858208955224, "grad_norm": 0.6900269513139395, "learning_rate": 7.329898158766668e-08, "loss": 0.1309, "step": 2027 }, { "epoch": 1.8917910447761193, "grad_norm": 0.6670718565977884, "learning_rate": 7.20543674285712e-08, "loss": 0.0964, "step": 2028 }, { "epoch": 1.892723880597015, "grad_norm": 0.6802322397575383, "learning_rate": 7.082033400876597e-08, "loss": 0.0988, "step": 2029 }, { "epoch": 1.8936567164179103, "grad_norm": 0.6821001814652339, "learning_rate": 6.959688397783104e-08, "loss": 0.1228, "step": 2030 }, { "epoch": 1.894589552238806, "grad_norm": 0.5821053052437037, "learning_rate": 6.838401996262289e-08, "loss": 0.0754, "step": 2031 }, { "epoch": 1.8955223880597014, "grad_norm": 0.6720760678504645, "learning_rate": 6.718174456726789e-08, "loss": 0.118, "step": 2032 }, { "epoch": 1.8964552238805972, "grad_norm": 0.6751217599741611, "learning_rate": 6.599006037315891e-08, "loss": 0.0947, "step": 2033 }, { "epoch": 1.8973880597014925, "grad_norm": 0.6924774846463455, "learning_rate": 6.480896993894925e-08, "loss": 0.1301, "step": 2034 }, { "epoch": 1.898320895522388, "grad_norm": 0.675589733825261, "learning_rate": 6.363847580054483e-08, "loss": 0.0994, "step": 2035 }, { "epoch": 1.8992537313432836, "grad_norm": 0.6604723911002577, "learning_rate": 6.247858047110145e-08, "loss": 0.1007, "step": 2036 }, { "epoch": 1.900186567164179, "grad_norm": 0.6617349846313897, "learning_rate": 6.13292864410181e-08, "loss": 0.114, "step": 2037 }, { "epoch": 1.9011194029850746, "grad_norm": 0.6617834009195274, "learning_rate": 6.019059617793088e-08, "loss": 0.1074, "step": 2038 }, { "epoch": 1.9020522388059702, "grad_norm": 0.6406198610296271, "learning_rate": 5.906251212670966e-08, "loss": 0.102, "step": 2039 }, { "epoch": 1.9029850746268657, "grad_norm": 0.6473668526265406, "learning_rate": 5.794503670945195e-08, "loss": 0.1073, "step": 2040 }, { "epoch": 1.903917910447761, "grad_norm": 0.6402229372124031, "learning_rate": 5.683817232547739e-08, "loss": 0.1131, "step": 2041 }, { "epoch": 1.9048507462686568, "grad_norm": 0.7060017353397369, "learning_rate": 5.5741921351322726e-08, "loss": 0.1332, "step": 2042 }, { "epoch": 1.9057835820895521, "grad_norm": 0.6518292368932501, "learning_rate": 5.465628614073626e-08, "loss": 0.1004, "step": 2043 }, { "epoch": 1.9067164179104479, "grad_norm": 0.6328917766729254, "learning_rate": 5.3581269024673975e-08, "loss": 0.097, "step": 2044 }, { "epoch": 1.9076492537313432, "grad_norm": 0.658490226314769, "learning_rate": 5.251687231129288e-08, "loss": 0.104, "step": 2045 }, { "epoch": 1.908582089552239, "grad_norm": 0.6569997998555758, "learning_rate": 5.1463098285948755e-08, "loss": 0.1032, "step": 2046 }, { "epoch": 1.9095149253731343, "grad_norm": 0.7392325163741427, "learning_rate": 5.0419949211188426e-08, "loss": 0.1559, "step": 2047 }, { "epoch": 1.9104477611940298, "grad_norm": 0.655828122155578, "learning_rate": 4.9387427326745287e-08, "loss": 0.1112, "step": 2048 }, { "epoch": 1.9113805970149254, "grad_norm": 0.640666718627407, "learning_rate": 4.8365534849536546e-08, "loss": 0.0901, "step": 2049 }, { "epoch": 1.912313432835821, "grad_norm": 0.6260646589561353, "learning_rate": 4.7354273973657106e-08, "loss": 0.0926, "step": 2050 }, { "epoch": 1.9132462686567164, "grad_norm": 0.6333783246249814, "learning_rate": 4.635364687037347e-08, "loss": 0.1001, "step": 2051 }, { "epoch": 1.914179104477612, "grad_norm": 0.6634418075187699, "learning_rate": 4.536365568812206e-08, "loss": 0.1179, "step": 2052 }, { "epoch": 1.9151119402985075, "grad_norm": 0.6682040173042607, "learning_rate": 4.438430255250148e-08, "loss": 0.1052, "step": 2053 }, { "epoch": 1.9160447761194028, "grad_norm": 0.6410437589403774, "learning_rate": 4.3415589566271345e-08, "loss": 0.0968, "step": 2054 }, { "epoch": 1.9169776119402986, "grad_norm": 0.5976717721228688, "learning_rate": 4.245751880934401e-08, "loss": 0.0817, "step": 2055 }, { "epoch": 1.917910447761194, "grad_norm": 0.6372006556468219, "learning_rate": 4.1510092338784005e-08, "loss": 0.1038, "step": 2056 }, { "epoch": 1.9188432835820897, "grad_norm": 0.70336400727288, "learning_rate": 4.057331218880023e-08, "loss": 0.125, "step": 2057 }, { "epoch": 1.919776119402985, "grad_norm": 0.6408161707633523, "learning_rate": 3.9647180370742664e-08, "loss": 0.0927, "step": 2058 }, { "epoch": 1.9207089552238807, "grad_norm": 0.6451706851346851, "learning_rate": 3.8731698873099025e-08, "loss": 0.104, "step": 2059 }, { "epoch": 1.921641791044776, "grad_norm": 0.6721570063691603, "learning_rate": 3.782686966149085e-08, "loss": 0.1032, "step": 2060 }, { "epoch": 1.9225746268656716, "grad_norm": 0.6724225695947336, "learning_rate": 3.6932694678666335e-08, "loss": 0.1125, "step": 2061 }, { "epoch": 1.9235074626865671, "grad_norm": 0.6666695535822613, "learning_rate": 3.604917584449919e-08, "loss": 0.0967, "step": 2062 }, { "epoch": 1.9244402985074627, "grad_norm": 0.6256638870780018, "learning_rate": 3.5176315055983625e-08, "loss": 0.0997, "step": 2063 }, { "epoch": 1.9253731343283582, "grad_norm": 0.631995533057246, "learning_rate": 3.431411418722941e-08, "loss": 0.0891, "step": 2064 }, { "epoch": 1.9263059701492538, "grad_norm": 0.6923413473109534, "learning_rate": 3.346257508945849e-08, "loss": 0.1214, "step": 2065 }, { "epoch": 1.9272388059701493, "grad_norm": 0.6345461480327402, "learning_rate": 3.26216995910017e-08, "loss": 0.1041, "step": 2066 }, { "epoch": 1.9281716417910446, "grad_norm": 0.6532536090951201, "learning_rate": 3.1791489497293715e-08, "loss": 0.1004, "step": 2067 }, { "epoch": 1.9291044776119404, "grad_norm": 0.778969248354311, "learning_rate": 3.097194659086977e-08, "loss": 0.1753, "step": 2068 }, { "epoch": 1.9300373134328357, "grad_norm": 0.6528246517484569, "learning_rate": 3.016307263136231e-08, "loss": 0.1127, "step": 2069 }, { "epoch": 1.9309701492537314, "grad_norm": 0.6449704628935081, "learning_rate": 2.9364869355494874e-08, "loss": 0.1023, "step": 2070 }, { "epoch": 1.9319029850746268, "grad_norm": 0.6663004646111129, "learning_rate": 2.857733847708155e-08, "loss": 0.1103, "step": 2071 }, { "epoch": 1.9328358208955225, "grad_norm": 0.682419168798923, "learning_rate": 2.7800481687021987e-08, "loss": 0.121, "step": 2072 }, { "epoch": 1.9337686567164178, "grad_norm": 0.6572007387105682, "learning_rate": 2.7034300653295818e-08, "loss": 0.1143, "step": 2073 }, { "epoch": 1.9347014925373134, "grad_norm": 0.6132975004212197, "learning_rate": 2.6278797020963253e-08, "loss": 0.086, "step": 2074 }, { "epoch": 1.935634328358209, "grad_norm": 0.7039217012455562, "learning_rate": 2.5533972412157825e-08, "loss": 0.1381, "step": 2075 }, { "epoch": 1.9365671641791045, "grad_norm": 0.6650293998264659, "learning_rate": 2.479982842608475e-08, "loss": 0.1064, "step": 2076 }, { "epoch": 1.9375, "grad_norm": 0.6526730608104929, "learning_rate": 2.4076366639015914e-08, "loss": 0.1105, "step": 2077 }, { "epoch": 1.9384328358208955, "grad_norm": 0.6682531800317112, "learning_rate": 2.3363588604288777e-08, "loss": 0.0967, "step": 2078 }, { "epoch": 1.939365671641791, "grad_norm": 0.698605698168699, "learning_rate": 2.2661495852301376e-08, "loss": 0.1147, "step": 2079 }, { "epoch": 1.9402985074626866, "grad_norm": 0.6342166834653444, "learning_rate": 2.1970089890509527e-08, "loss": 0.0941, "step": 2080 }, { "epoch": 1.9412313432835822, "grad_norm": 0.6791716945345843, "learning_rate": 2.128937220342353e-08, "loss": 0.1146, "step": 2081 }, { "epoch": 1.9421641791044775, "grad_norm": 0.6314957833539162, "learning_rate": 2.0619344252605922e-08, "loss": 0.0936, "step": 2082 }, { "epoch": 1.9430970149253732, "grad_norm": 0.6840740246825007, "learning_rate": 1.9960007476665376e-08, "loss": 0.1256, "step": 2083 }, { "epoch": 1.9440298507462686, "grad_norm": 0.6724121988593076, "learning_rate": 1.931136329125727e-08, "loss": 0.1205, "step": 2084 }, { "epoch": 1.9449626865671643, "grad_norm": 0.6123065524787217, "learning_rate": 1.8673413089078108e-08, "loss": 0.0859, "step": 2085 }, { "epoch": 1.9458955223880596, "grad_norm": 0.6417150362281923, "learning_rate": 1.8046158239864996e-08, "loss": 0.1162, "step": 2086 }, { "epoch": 1.9468283582089554, "grad_norm": 0.7227302240839547, "learning_rate": 1.7429600090388966e-08, "loss": 0.1427, "step": 2087 }, { "epoch": 1.9477611940298507, "grad_norm": 0.6721416003648963, "learning_rate": 1.6823739964456078e-08, "loss": 0.1011, "step": 2088 }, { "epoch": 1.9486940298507462, "grad_norm": 0.6552662276361532, "learning_rate": 1.622857916290188e-08, "loss": 0.1225, "step": 2089 }, { "epoch": 1.9496268656716418, "grad_norm": 0.6476859192148557, "learning_rate": 1.5644118963590305e-08, "loss": 0.1001, "step": 2090 }, { "epoch": 1.9505597014925373, "grad_norm": 0.6705381313637332, "learning_rate": 1.5070360621408653e-08, "loss": 0.1064, "step": 2091 }, { "epoch": 1.9514925373134329, "grad_norm": 0.6594775269424378, "learning_rate": 1.4507305368268166e-08, "loss": 0.1063, "step": 2092 }, { "epoch": 1.9524253731343284, "grad_norm": 0.6178696968496611, "learning_rate": 1.395495441309791e-08, "loss": 0.1008, "step": 2093 }, { "epoch": 1.953358208955224, "grad_norm": 0.6019227937300999, "learning_rate": 1.3413308941845338e-08, "loss": 0.0817, "step": 2094 }, { "epoch": 1.9542910447761193, "grad_norm": 0.6259306586181282, "learning_rate": 1.2882370117471843e-08, "loss": 0.0878, "step": 2095 }, { "epoch": 1.955223880597015, "grad_norm": 0.7064025874033598, "learning_rate": 1.2362139079949431e-08, "loss": 0.1386, "step": 2096 }, { "epoch": 1.9561567164179103, "grad_norm": 0.6833892721719546, "learning_rate": 1.185261694626183e-08, "loss": 0.1227, "step": 2097 }, { "epoch": 1.957089552238806, "grad_norm": 0.7426689923467742, "learning_rate": 1.1353804810397828e-08, "loss": 0.152, "step": 2098 }, { "epoch": 1.9580223880597014, "grad_norm": 0.6880702120470592, "learning_rate": 1.086570374335183e-08, "loss": 0.1278, "step": 2099 }, { "epoch": 1.9589552238805972, "grad_norm": 0.6268433721929301, "learning_rate": 1.038831479311997e-08, "loss": 0.0831, "step": 2100 }, { "epoch": 1.9598880597014925, "grad_norm": 0.684019894537996, "learning_rate": 9.92163898470011e-09, "loss": 0.119, "step": 2101 }, { "epoch": 1.960820895522388, "grad_norm": 0.7805886111049625, "learning_rate": 9.465677320085742e-09, "loss": 0.1524, "step": 2102 }, { "epoch": 1.9617537313432836, "grad_norm": 0.6611433161279398, "learning_rate": 9.020430778267642e-09, "loss": 0.1011, "step": 2103 }, { "epoch": 1.962686567164179, "grad_norm": 0.6410149024197487, "learning_rate": 8.585900315229434e-09, "loss": 0.1068, "step": 2104 }, { "epoch": 1.9636194029850746, "grad_norm": 0.6737177122476619, "learning_rate": 8.162086863948149e-09, "loss": 0.1201, "step": 2105 }, { "epoch": 1.9645522388059702, "grad_norm": 0.6243268206055856, "learning_rate": 7.748991334387557e-09, "loss": 0.1063, "step": 2106 }, { "epoch": 1.9654850746268657, "grad_norm": 0.6911257932996436, "learning_rate": 7.346614613501502e-09, "loss": 0.1077, "step": 2107 }, { "epoch": 1.966417910447761, "grad_norm": 0.6179484045295606, "learning_rate": 6.9549575652289036e-09, "loss": 0.0895, "step": 2108 }, { "epoch": 1.9673507462686568, "grad_norm": 0.6443713345474682, "learning_rate": 6.57402103049154e-09, "loss": 0.0949, "step": 2109 }, { "epoch": 1.9682835820895521, "grad_norm": 0.6556125240743547, "learning_rate": 6.203805827195153e-09, "loss": 0.126, "step": 2110 }, { "epoch": 1.9692164179104479, "grad_norm": 0.6496230744971924, "learning_rate": 5.844312750224457e-09, "loss": 0.0952, "step": 2111 }, { "epoch": 1.9701492537313432, "grad_norm": 0.6554677997685712, "learning_rate": 5.495542571443135e-09, "loss": 0.1055, "step": 2112 }, { "epoch": 1.971082089552239, "grad_norm": 0.6399300644339321, "learning_rate": 5.157496039691623e-09, "loss": 0.1016, "step": 2113 }, { "epoch": 1.9720149253731343, "grad_norm": 0.7399622976955944, "learning_rate": 4.830173880785993e-09, "loss": 0.1273, "step": 2114 }, { "epoch": 1.9729477611940298, "grad_norm": 0.6538942684816098, "learning_rate": 4.51357679751685e-09, "loss": 0.082, "step": 2115 }, { "epoch": 1.9738805970149254, "grad_norm": 0.6241828922707672, "learning_rate": 4.207705469645995e-09, "loss": 0.0836, "step": 2116 }, { "epoch": 1.974813432835821, "grad_norm": 0.661794610288549, "learning_rate": 3.9125605539064305e-09, "loss": 0.0994, "step": 2117 }, { "epoch": 1.9757462686567164, "grad_norm": 0.6541045587949816, "learning_rate": 3.6281426840006907e-09, "loss": 0.0949, "step": 2118 }, { "epoch": 1.976679104477612, "grad_norm": 0.6597353719884125, "learning_rate": 3.354452470599179e-09, "loss": 0.1049, "step": 2119 }, { "epoch": 1.9776119402985075, "grad_norm": 0.68074145451284, "learning_rate": 3.0914905013396113e-09, "loss": 0.117, "step": 2120 }, { "epoch": 1.9785447761194028, "grad_norm": 0.7335183322471679, "learning_rate": 2.8392573408242418e-09, "loss": 0.15, "step": 2121 }, { "epoch": 1.9794776119402986, "grad_norm": 0.6489277332482255, "learning_rate": 2.597753530620417e-09, "loss": 0.1096, "step": 2122 }, { "epoch": 1.980410447761194, "grad_norm": 0.6423975884961857, "learning_rate": 2.3669795892589108e-09, "loss": 0.1047, "step": 2123 }, { "epoch": 1.9813432835820897, "grad_norm": 0.6128716410938589, "learning_rate": 2.146936012231704e-09, "loss": 0.0834, "step": 2124 }, { "epoch": 1.982276119402985, "grad_norm": 0.6449365473948554, "learning_rate": 1.937623271991429e-09, "loss": 0.0759, "step": 2125 }, { "epoch": 1.9832089552238807, "grad_norm": 0.703859909386184, "learning_rate": 1.7390418179519253e-09, "loss": 0.1538, "step": 2126 }, { "epoch": 1.984141791044776, "grad_norm": 0.64399910783831, "learning_rate": 1.5511920764849087e-09, "loss": 0.0838, "step": 2127 }, { "epoch": 1.9850746268656716, "grad_norm": 0.6198634340631743, "learning_rate": 1.3740744509205263e-09, "loss": 0.0829, "step": 2128 }, { "epoch": 1.9860074626865671, "grad_norm": 0.6909698689011032, "learning_rate": 1.2076893215462459e-09, "loss": 0.1268, "step": 2129 }, { "epoch": 1.9869402985074627, "grad_norm": 0.7256981150451711, "learning_rate": 1.0520370456063023e-09, "loss": 0.1312, "step": 2130 }, { "epoch": 1.9878731343283582, "grad_norm": 0.6368783865813112, "learning_rate": 9.071179572989198e-10, "loss": 0.1034, "step": 2131 }, { "epoch": 1.9888059701492538, "grad_norm": 0.6809483660140083, "learning_rate": 7.72932367779089e-10, "loss": 0.0973, "step": 2132 }, { "epoch": 1.9897388059701493, "grad_norm": 0.6581603749611615, "learning_rate": 6.494805651557911e-10, "loss": 0.1047, "step": 2133 }, { "epoch": 1.9906716417910446, "grad_norm": 0.6226287375116897, "learning_rate": 5.367628144897774e-10, "loss": 0.0914, "step": 2134 }, { "epoch": 1.9916044776119404, "grad_norm": 0.7162061150622722, "learning_rate": 4.3477935779689953e-10, "loss": 0.1411, "step": 2135 }, { "epoch": 1.9925373134328357, "grad_norm": 0.7063946868206198, "learning_rate": 3.4353041404477926e-10, "loss": 0.1387, "step": 2136 }, { "epoch": 1.9934701492537314, "grad_norm": 0.6503986424287748, "learning_rate": 2.630161791528085e-10, "loss": 0.1025, "step": 2137 }, { "epoch": 1.9944029850746268, "grad_norm": 0.6426527552142414, "learning_rate": 1.932368259921491e-10, "loss": 0.1138, "step": 2138 }, { "epoch": 1.9953358208955225, "grad_norm": 0.6192888916071833, "learning_rate": 1.3419250438517771e-10, "loss": 0.106, "step": 2139 }, { "epoch": 1.9962686567164178, "grad_norm": 0.6748102034340371, "learning_rate": 8.588334110604113e-11, "loss": 0.1228, "step": 2140 }, { "epoch": 1.9972014925373134, "grad_norm": 0.6363918086164735, "learning_rate": 4.830943987843562e-11, "loss": 0.0969, "step": 2141 }, { "epoch": 1.998134328358209, "grad_norm": 0.6022951378212655, "learning_rate": 2.1470881376162157e-11, "loss": 0.0835, "step": 2142 }, { "epoch": 1.9990671641791045, "grad_norm": 0.6667173578578698, "learning_rate": 5.367723225346844e-12, "loss": 0.1058, "step": 2143 }, { "epoch": 2.0, "grad_norm": 0.5994425644314461, "learning_rate": 0.0, "loss": 0.0825, "step": 2144 }, { "epoch": 2.0, "step": 2144, "total_flos": 61077656174592.0, "train_loss": 0.13696018441705338, "train_runtime": 2226.2051, "train_samples_per_second": 7.699, "train_steps_per_second": 0.963 } ], "logging_steps": 1, "max_steps": 2144, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 61077656174592.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }