| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1142, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0017513134851138354, |
| "grad_norm": 1.6189321527508724, |
| "learning_rate": 9.99998108060379e-06, |
| "loss": 0.2516, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0035026269702276708, |
| "grad_norm": 3.2205596273078165, |
| "learning_rate": 9.999924322558328e-06, |
| "loss": 0.4272, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.005253940455341506, |
| "grad_norm": 1.9512359773972658, |
| "learning_rate": 9.99982972629315e-06, |
| "loss": 0.3537, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0070052539404553416, |
| "grad_norm": 2.303082988314436, |
| "learning_rate": 9.99969729252414e-06, |
| "loss": 0.4041, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008756567425569177, |
| "grad_norm": 1.7602753076895619, |
| "learning_rate": 9.999527022253521e-06, |
| "loss": 0.282, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.010507880910683012, |
| "grad_norm": 1.472660482986906, |
| "learning_rate": 9.999318916769858e-06, |
| "loss": 0.3178, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.012259194395796848, |
| "grad_norm": 1.5120175005465801, |
| "learning_rate": 9.999072977648042e-06, |
| "loss": 0.2982, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.014010507880910683, |
| "grad_norm": 1.552294241363048, |
| "learning_rate": 9.998789206749284e-06, |
| "loss": 0.2406, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01576182136602452, |
| "grad_norm": 1.4355383392258896, |
| "learning_rate": 9.998467606221091e-06, |
| "loss": 0.3063, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.017513134851138354, |
| "grad_norm": 1.1505639079486283, |
| "learning_rate": 9.998108178497259e-06, |
| "loss": 0.2002, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01926444833625219, |
| "grad_norm": 1.478144193504059, |
| "learning_rate": 9.99771092629785e-06, |
| "loss": 0.249, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.021015761821366025, |
| "grad_norm": 1.2422830596341097, |
| "learning_rate": 9.997275852629172e-06, |
| "loss": 0.2275, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02276707530647986, |
| "grad_norm": 1.176007388166311, |
| "learning_rate": 9.99680296078376e-06, |
| "loss": 0.1875, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.024518388791593695, |
| "grad_norm": 1.5860889058713947, |
| "learning_rate": 9.996292254340342e-06, |
| "loss": 0.2827, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02626970227670753, |
| "grad_norm": 1.708745706115137, |
| "learning_rate": 9.995743737163823e-06, |
| "loss": 0.2909, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.028021015761821366, |
| "grad_norm": 1.8765102069789172, |
| "learning_rate": 9.99515741340525e-06, |
| "loss": 0.1961, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0297723292469352, |
| "grad_norm": 1.4334195277716413, |
| "learning_rate": 9.994533287501775e-06, |
| "loss": 0.2652, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03152364273204904, |
| "grad_norm": 1.140465695934814, |
| "learning_rate": 9.993871364176637e-06, |
| "loss": 0.2149, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03327495621716287, |
| "grad_norm": 1.1401794918473909, |
| "learning_rate": 9.993171648439109e-06, |
| "loss": 0.1913, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03502626970227671, |
| "grad_norm": 1.3063665613307132, |
| "learning_rate": 9.992434145584471e-06, |
| "loss": 0.2375, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03677758318739054, |
| "grad_norm": 1.1007379852192687, |
| "learning_rate": 9.991658861193966e-06, |
| "loss": 0.2276, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03852889667250438, |
| "grad_norm": 1.3635848002372042, |
| "learning_rate": 9.99084580113476e-06, |
| "loss": 0.2432, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.040280210157618214, |
| "grad_norm": 1.437328015329854, |
| "learning_rate": 9.989994971559897e-06, |
| "loss": 0.2815, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.04203152364273205, |
| "grad_norm": 1.2255764233144246, |
| "learning_rate": 9.989106378908246e-06, |
| "loss": 0.2028, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.043782837127845885, |
| "grad_norm": 0.934261492585796, |
| "learning_rate": 9.988180029904463e-06, |
| "loss": 0.1948, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04553415061295972, |
| "grad_norm": 1.3236962758603368, |
| "learning_rate": 9.987215931558935e-06, |
| "loss": 0.2235, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.047285464098073555, |
| "grad_norm": 1.389095598869151, |
| "learning_rate": 9.986214091167726e-06, |
| "loss": 0.2542, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.04903677758318739, |
| "grad_norm": 1.2087489144763743, |
| "learning_rate": 9.985174516312519e-06, |
| "loss": 0.2045, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.050788091068301226, |
| "grad_norm": 1.0708229969119525, |
| "learning_rate": 9.984097214860566e-06, |
| "loss": 0.2343, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05253940455341506, |
| "grad_norm": 1.0781105753548819, |
| "learning_rate": 9.982982194964626e-06, |
| "loss": 0.1994, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0542907180385289, |
| "grad_norm": 1.2715702567474565, |
| "learning_rate": 9.981829465062898e-06, |
| "loss": 0.2553, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.05604203152364273, |
| "grad_norm": 0.9520552503997382, |
| "learning_rate": 9.980639033878965e-06, |
| "loss": 0.1844, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.05779334500875657, |
| "grad_norm": 1.2877099584096807, |
| "learning_rate": 9.979410910421724e-06, |
| "loss": 0.232, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0595446584938704, |
| "grad_norm": 1.364716904790534, |
| "learning_rate": 9.978145103985315e-06, |
| "loss": 0.1983, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06129597197898424, |
| "grad_norm": 1.0906698260991894, |
| "learning_rate": 9.976841624149054e-06, |
| "loss": 0.1903, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06304728546409807, |
| "grad_norm": 1.09412669488779, |
| "learning_rate": 9.975500480777364e-06, |
| "loss": 0.1919, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0647985989492119, |
| "grad_norm": 1.2010020335474978, |
| "learning_rate": 9.974121684019695e-06, |
| "loss": 0.1608, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.06654991243432574, |
| "grad_norm": 0.8480134607436435, |
| "learning_rate": 9.972705244310445e-06, |
| "loss": 0.1524, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.06830122591943957, |
| "grad_norm": 1.2269951958743044, |
| "learning_rate": 9.97125117236889e-06, |
| "loss": 0.2164, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.07005253940455342, |
| "grad_norm": 1.5214916016344373, |
| "learning_rate": 9.969759479199093e-06, |
| "loss": 0.2573, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07180385288966724, |
| "grad_norm": 1.3616317930694422, |
| "learning_rate": 9.968230176089828e-06, |
| "loss": 0.2272, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07355516637478109, |
| "grad_norm": 1.1932422020281335, |
| "learning_rate": 9.966663274614495e-06, |
| "loss": 0.1567, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07530647985989491, |
| "grad_norm": 0.8716927387571308, |
| "learning_rate": 9.965058786631022e-06, |
| "loss": 0.2293, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.07705779334500876, |
| "grad_norm": 1.3932388141794017, |
| "learning_rate": 9.963416724281787e-06, |
| "loss": 0.2286, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.07880910683012259, |
| "grad_norm": 1.2309788186288007, |
| "learning_rate": 9.961737099993523e-06, |
| "loss": 0.2633, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.08056042031523643, |
| "grad_norm": 1.0195652741277668, |
| "learning_rate": 9.960019926477218e-06, |
| "loss": 0.1664, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08231173380035026, |
| "grad_norm": 0.805855610816426, |
| "learning_rate": 9.958265216728032e-06, |
| "loss": 0.1538, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0840630472854641, |
| "grad_norm": 0.9295478292913552, |
| "learning_rate": 9.956472984025179e-06, |
| "loss": 0.154, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08581436077057793, |
| "grad_norm": 1.1182679627713559, |
| "learning_rate": 9.954643241931845e-06, |
| "loss": 0.1946, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08756567425569177, |
| "grad_norm": 0.9976680326317856, |
| "learning_rate": 9.952776004295077e-06, |
| "loss": 0.1705, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0893169877408056, |
| "grad_norm": 0.9178402665093803, |
| "learning_rate": 9.95087128524568e-06, |
| "loss": 0.1583, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09106830122591944, |
| "grad_norm": 0.9494960096058251, |
| "learning_rate": 9.948929099198104e-06, |
| "loss": 0.1779, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.09281961471103327, |
| "grad_norm": 1.546996916824689, |
| "learning_rate": 9.946949460850346e-06, |
| "loss": 0.2514, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.09457092819614711, |
| "grad_norm": 0.9923825116181026, |
| "learning_rate": 9.944932385183831e-06, |
| "loss": 0.1989, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.09632224168126094, |
| "grad_norm": 1.0402809673797797, |
| "learning_rate": 9.9428778874633e-06, |
| "loss": 0.2107, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.09807355516637478, |
| "grad_norm": 1.0825018590876776, |
| "learning_rate": 9.940785983236696e-06, |
| "loss": 0.1676, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.09982486865148861, |
| "grad_norm": 1.379406625027056, |
| "learning_rate": 9.938656688335045e-06, |
| "loss": 0.1991, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.10157618213660245, |
| "grad_norm": 1.0500448305369041, |
| "learning_rate": 9.936490018872336e-06, |
| "loss": 0.1695, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.10332749562171628, |
| "grad_norm": 1.1035051286979576, |
| "learning_rate": 9.934285991245399e-06, |
| "loss": 0.1939, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.10507880910683012, |
| "grad_norm": 0.9812842055285581, |
| "learning_rate": 9.932044622133785e-06, |
| "loss": 0.1937, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10683012259194395, |
| "grad_norm": 0.9527544704706223, |
| "learning_rate": 9.929765928499635e-06, |
| "loss": 0.171, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1085814360770578, |
| "grad_norm": 1.302925637235369, |
| "learning_rate": 9.927449927587549e-06, |
| "loss": 0.278, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.11033274956217162, |
| "grad_norm": 0.917614726426997, |
| "learning_rate": 9.925096636924465e-06, |
| "loss": 0.2583, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.11208406304728546, |
| "grad_norm": 1.1260675596481124, |
| "learning_rate": 9.922706074319517e-06, |
| "loss": 0.1954, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1138353765323993, |
| "grad_norm": 0.9735158179996255, |
| "learning_rate": 9.920278257863904e-06, |
| "loss": 0.1688, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.11558669001751314, |
| "grad_norm": 1.2032784643650782, |
| "learning_rate": 9.917813205930758e-06, |
| "loss": 0.2189, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.11733800350262696, |
| "grad_norm": 1.1710648620955906, |
| "learning_rate": 9.915310937174995e-06, |
| "loss": 0.2043, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1190893169877408, |
| "grad_norm": 1.0468308734796317, |
| "learning_rate": 9.91277147053318e-06, |
| "loss": 0.1912, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.12084063047285463, |
| "grad_norm": 1.0767180014359876, |
| "learning_rate": 9.910194825223384e-06, |
| "loss": 0.1916, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.12259194395796848, |
| "grad_norm": 1.157850250531908, |
| "learning_rate": 9.90758102074504e-06, |
| "loss": 0.1692, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1243432574430823, |
| "grad_norm": 1.1978899308041546, |
| "learning_rate": 9.90493007687878e-06, |
| "loss": 0.1904, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.12609457092819615, |
| "grad_norm": 1.3308506120726233, |
| "learning_rate": 9.902242013686316e-06, |
| "loss": 0.2215, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.12784588441331, |
| "grad_norm": 1.186862457320643, |
| "learning_rate": 9.899516851510256e-06, |
| "loss": 0.2086, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1295971978984238, |
| "grad_norm": 0.9921699627050491, |
| "learning_rate": 9.89675461097397e-06, |
| "loss": 0.1942, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.13134851138353765, |
| "grad_norm": 1.1944882918690307, |
| "learning_rate": 9.893955312981428e-06, |
| "loss": 0.1996, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1330998248686515, |
| "grad_norm": 1.3595222631043964, |
| "learning_rate": 9.89111897871704e-06, |
| "loss": 0.2493, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.13485113835376533, |
| "grad_norm": 1.311168457583758, |
| "learning_rate": 9.888245629645502e-06, |
| "loss": 0.3023, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.13660245183887915, |
| "grad_norm": 1.2264268547734622, |
| "learning_rate": 9.885335287511621e-06, |
| "loss": 0.2375, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.138353765323993, |
| "grad_norm": 1.0615586907581638, |
| "learning_rate": 9.882387974340166e-06, |
| "loss": 0.2196, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.14010507880910683, |
| "grad_norm": 1.2113698183185242, |
| "learning_rate": 9.879403712435692e-06, |
| "loss": 0.2094, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14185639229422067, |
| "grad_norm": 1.6599951150659369, |
| "learning_rate": 9.876382524382372e-06, |
| "loss": 0.2155, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1436077057793345, |
| "grad_norm": 1.1421227311156412, |
| "learning_rate": 9.873324433043825e-06, |
| "loss": 0.2082, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.14535901926444833, |
| "grad_norm": 1.3566945133013182, |
| "learning_rate": 9.87022946156295e-06, |
| "loss": 0.221, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.14711033274956217, |
| "grad_norm": 1.3051526600631753, |
| "learning_rate": 9.867097633361745e-06, |
| "loss": 0.1775, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.14886164623467601, |
| "grad_norm": 0.8957821215184415, |
| "learning_rate": 9.863928972141127e-06, |
| "loss": 0.1868, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.15061295971978983, |
| "grad_norm": 0.8391246575100022, |
| "learning_rate": 9.860723501880758e-06, |
| "loss": 0.1299, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.15236427320490367, |
| "grad_norm": 1.0429245203527981, |
| "learning_rate": 9.857481246838867e-06, |
| "loss": 0.1631, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.15411558669001751, |
| "grad_norm": 0.9670342133099205, |
| "learning_rate": 9.854202231552052e-06, |
| "loss": 0.186, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.15586690017513136, |
| "grad_norm": 1.0108020188058704, |
| "learning_rate": 9.850886480835113e-06, |
| "loss": 0.2, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.15761821366024517, |
| "grad_norm": 1.1527622631935206, |
| "learning_rate": 9.847534019780848e-06, |
| "loss": 0.2098, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.159369527145359, |
| "grad_norm": 1.044844838210246, |
| "learning_rate": 9.844144873759874e-06, |
| "loss": 0.1915, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.16112084063047286, |
| "grad_norm": 1.4573741072487383, |
| "learning_rate": 9.840719068420427e-06, |
| "loss": 0.2642, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1628721541155867, |
| "grad_norm": 1.091447061849621, |
| "learning_rate": 9.837256629688177e-06, |
| "loss": 0.1863, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1646234676007005, |
| "grad_norm": 1.252517589764872, |
| "learning_rate": 9.833757583766025e-06, |
| "loss": 0.2034, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.16637478108581435, |
| "grad_norm": 1.1107281185356483, |
| "learning_rate": 9.830221957133903e-06, |
| "loss": 0.1832, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1681260945709282, |
| "grad_norm": 0.976784716781447, |
| "learning_rate": 9.82664977654858e-06, |
| "loss": 0.1326, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.16987740805604204, |
| "grad_norm": 1.19891512446026, |
| "learning_rate": 9.823041069043457e-06, |
| "loss": 0.191, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.17162872154115585, |
| "grad_norm": 1.1984485781115666, |
| "learning_rate": 9.81939586192836e-06, |
| "loss": 0.2456, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1733800350262697, |
| "grad_norm": 1.978454735318979, |
| "learning_rate": 9.815714182789335e-06, |
| "loss": 0.3018, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.17513134851138354, |
| "grad_norm": 1.016573147746523, |
| "learning_rate": 9.811996059488445e-06, |
| "loss": 0.2071, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17688266199649738, |
| "grad_norm": 1.1362427606530137, |
| "learning_rate": 9.808241520163542e-06, |
| "loss": 0.1999, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.1786339754816112, |
| "grad_norm": 1.230279571459468, |
| "learning_rate": 9.804450593228079e-06, |
| "loss": 0.1463, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.18038528896672504, |
| "grad_norm": 0.868837318204682, |
| "learning_rate": 9.800623307370874e-06, |
| "loss": 0.1376, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.18213660245183888, |
| "grad_norm": 0.972725973754719, |
| "learning_rate": 9.7967596915559e-06, |
| "loss": 0.1922, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.18388791593695272, |
| "grad_norm": 0.8173644404176668, |
| "learning_rate": 9.792859775022069e-06, |
| "loss": 0.1299, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.18563922942206654, |
| "grad_norm": 1.0879870179146585, |
| "learning_rate": 9.788923587283008e-06, |
| "loss": 0.2063, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.18739054290718038, |
| "grad_norm": 0.952468881220675, |
| "learning_rate": 9.784951158126836e-06, |
| "loss": 0.1661, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.18914185639229422, |
| "grad_norm": 1.0914420674457646, |
| "learning_rate": 9.780942517615937e-06, |
| "loss": 0.1622, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.19089316987740806, |
| "grad_norm": 1.4948764206782315, |
| "learning_rate": 9.776897696086734e-06, |
| "loss": 0.2464, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.19264448336252188, |
| "grad_norm": 1.1347837540296297, |
| "learning_rate": 9.772816724149459e-06, |
| "loss": 0.159, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.19439579684763572, |
| "grad_norm": 1.3238181457613138, |
| "learning_rate": 9.768699632687922e-06, |
| "loss": 0.2241, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.19614711033274956, |
| "grad_norm": 1.042910339183979, |
| "learning_rate": 9.764546452859277e-06, |
| "loss": 0.1967, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1978984238178634, |
| "grad_norm": 1.1785097761069256, |
| "learning_rate": 9.760357216093788e-06, |
| "loss": 0.1961, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.19964973730297722, |
| "grad_norm": 1.1403104414823952, |
| "learning_rate": 9.756131954094582e-06, |
| "loss": 0.2042, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.20140105078809106, |
| "grad_norm": 1.4022044273328718, |
| "learning_rate": 9.751870698837428e-06, |
| "loss": 0.258, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2031523642732049, |
| "grad_norm": 1.1023466504712938, |
| "learning_rate": 9.747573482570471e-06, |
| "loss": 0.1867, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.20490367775831875, |
| "grad_norm": 1.013716455534731, |
| "learning_rate": 9.74324033781401e-06, |
| "loss": 0.235, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.20665499124343256, |
| "grad_norm": 1.1766050779187667, |
| "learning_rate": 9.738871297360233e-06, |
| "loss": 0.2042, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2084063047285464, |
| "grad_norm": 1.1003072535572958, |
| "learning_rate": 9.734466394272988e-06, |
| "loss": 0.1945, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.21015761821366025, |
| "grad_norm": 1.4838997871202069, |
| "learning_rate": 9.730025661887517e-06, |
| "loss": 0.1961, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2119089316987741, |
| "grad_norm": 1.2740923322292086, |
| "learning_rate": 9.725549133810205e-06, |
| "loss": 0.1872, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2136602451838879, |
| "grad_norm": 1.0849396699876135, |
| "learning_rate": 9.721036843918343e-06, |
| "loss": 0.1887, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.21541155866900175, |
| "grad_norm": 1.171434657200328, |
| "learning_rate": 9.716488826359848e-06, |
| "loss": 0.1719, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2171628721541156, |
| "grad_norm": 0.8144293180316944, |
| "learning_rate": 9.711905115553014e-06, |
| "loss": 0.1217, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.21891418563922943, |
| "grad_norm": 0.8287059128714208, |
| "learning_rate": 9.707285746186262e-06, |
| "loss": 0.126, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.22066549912434325, |
| "grad_norm": 1.0397181324925568, |
| "learning_rate": 9.702630753217865e-06, |
| "loss": 0.2018, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2224168126094571, |
| "grad_norm": 1.2093454647956197, |
| "learning_rate": 9.697940171875682e-06, |
| "loss": 0.1819, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.22416812609457093, |
| "grad_norm": 0.971954663646255, |
| "learning_rate": 9.6932140376569e-06, |
| "loss": 0.1459, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.22591943957968477, |
| "grad_norm": 0.9871770951957114, |
| "learning_rate": 9.688452386327764e-06, |
| "loss": 0.1597, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2276707530647986, |
| "grad_norm": 1.1914487524596076, |
| "learning_rate": 9.6836552539233e-06, |
| "loss": 0.1894, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.22942206654991243, |
| "grad_norm": 1.3299445049184178, |
| "learning_rate": 9.678822676747048e-06, |
| "loss": 0.21, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.23117338003502627, |
| "grad_norm": 0.9944443296898016, |
| "learning_rate": 9.673954691370782e-06, |
| "loss": 0.1933, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2329246935201401, |
| "grad_norm": 1.2879765089518327, |
| "learning_rate": 9.669051334634243e-06, |
| "loss": 0.2187, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.23467600700525393, |
| "grad_norm": 1.1579504079908498, |
| "learning_rate": 9.66411264364485e-06, |
| "loss": 0.2593, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.23642732049036777, |
| "grad_norm": 0.889979983641812, |
| "learning_rate": 9.659138655777422e-06, |
| "loss": 0.1599, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2381786339754816, |
| "grad_norm": 1.1392333425773118, |
| "learning_rate": 9.654129408673897e-06, |
| "loss": 0.2232, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.23992994746059546, |
| "grad_norm": 0.927182815890979, |
| "learning_rate": 9.649084940243052e-06, |
| "loss": 0.1547, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.24168126094570927, |
| "grad_norm": 1.0230754932044808, |
| "learning_rate": 9.644005288660204e-06, |
| "loss": 0.1552, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2434325744308231, |
| "grad_norm": 1.0117195711045635, |
| "learning_rate": 9.638890492366924e-06, |
| "loss": 0.1563, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.24518388791593695, |
| "grad_norm": 1.4066019466468673, |
| "learning_rate": 9.633740590070763e-06, |
| "loss": 0.3144, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2469352014010508, |
| "grad_norm": 1.1513768439474406, |
| "learning_rate": 9.628555620744932e-06, |
| "loss": 0.1434, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2486865148861646, |
| "grad_norm": 1.0700849134487496, |
| "learning_rate": 9.62333562362803e-06, |
| "loss": 0.2161, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2504378283712785, |
| "grad_norm": 1.5059188434701325, |
| "learning_rate": 9.618080638223732e-06, |
| "loss": 0.2005, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2521891418563923, |
| "grad_norm": 0.9665925005797491, |
| "learning_rate": 9.612790704300501e-06, |
| "loss": 0.1828, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2539404553415061, |
| "grad_norm": 0.8229122412140533, |
| "learning_rate": 9.607465861891276e-06, |
| "loss": 0.1208, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.25569176882662, |
| "grad_norm": 1.0488909683370946, |
| "learning_rate": 9.602106151293182e-06, |
| "loss": 0.2138, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2574430823117338, |
| "grad_norm": 1.2990902146116425, |
| "learning_rate": 9.596711613067212e-06, |
| "loss": 0.2018, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2591943957968476, |
| "grad_norm": 0.8880761437071694, |
| "learning_rate": 9.59128228803793e-06, |
| "loss": 0.1679, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2609457092819615, |
| "grad_norm": 0.8863876920056281, |
| "learning_rate": 9.585818217293155e-06, |
| "loss": 0.1488, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.2626970227670753, |
| "grad_norm": 1.252288840375211, |
| "learning_rate": 9.580319442183654e-06, |
| "loss": 0.2294, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.26444833625218916, |
| "grad_norm": 1.1271115490692163, |
| "learning_rate": 9.574786004322831e-06, |
| "loss": 0.2005, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.266199649737303, |
| "grad_norm": 0.8958292187859824, |
| "learning_rate": 9.569217945586406e-06, |
| "loss": 0.1515, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2679509632224168, |
| "grad_norm": 1.1432682883821648, |
| "learning_rate": 9.563615308112106e-06, |
| "loss": 0.1727, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.26970227670753066, |
| "grad_norm": 0.8860825973578063, |
| "learning_rate": 9.557978134299332e-06, |
| "loss": 0.1561, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.2714535901926445, |
| "grad_norm": 0.9458112069640355, |
| "learning_rate": 9.552306466808861e-06, |
| "loss": 0.173, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2732049036777583, |
| "grad_norm": 1.1059388701307742, |
| "learning_rate": 9.546600348562499e-06, |
| "loss": 0.1939, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.27495621716287216, |
| "grad_norm": 1.3621793677790732, |
| "learning_rate": 9.54085982274277e-06, |
| "loss": 0.2033, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.276707530647986, |
| "grad_norm": 1.049168730239092, |
| "learning_rate": 9.535084932792588e-06, |
| "loss": 0.2193, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.27845884413309985, |
| "grad_norm": 0.8987325354112385, |
| "learning_rate": 9.529275722414926e-06, |
| "loss": 0.149, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.28021015761821366, |
| "grad_norm": 1.1205614005555482, |
| "learning_rate": 9.523432235572485e-06, |
| "loss": 0.1715, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2819614711033275, |
| "grad_norm": 1.1119065744262058, |
| "learning_rate": 9.517554516487361e-06, |
| "loss": 0.2139, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.28371278458844135, |
| "grad_norm": 1.2120128153983245, |
| "learning_rate": 9.511642609640714e-06, |
| "loss": 0.2055, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.28546409807355516, |
| "grad_norm": 1.0381552882652774, |
| "learning_rate": 9.505696559772427e-06, |
| "loss": 0.1521, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.287215411558669, |
| "grad_norm": 1.0616048587072129, |
| "learning_rate": 9.499716411880767e-06, |
| "loss": 0.1438, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.28896672504378285, |
| "grad_norm": 1.465227437163341, |
| "learning_rate": 9.493702211222052e-06, |
| "loss": 0.1939, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.29071803852889666, |
| "grad_norm": 1.2171201787031805, |
| "learning_rate": 9.4876540033103e-06, |
| "loss": 0.1542, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.29246935201401053, |
| "grad_norm": 0.9937562205583209, |
| "learning_rate": 9.481571833916884e-06, |
| "loss": 0.1822, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.29422066549912435, |
| "grad_norm": 1.139810203249971, |
| "learning_rate": 9.475455749070198e-06, |
| "loss": 0.2018, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.29597197898423816, |
| "grad_norm": 1.2507741492130755, |
| "learning_rate": 9.469305795055292e-06, |
| "loss": 0.2314, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.29772329246935203, |
| "grad_norm": 1.6644506023322219, |
| "learning_rate": 9.463122018413533e-06, |
| "loss": 0.1912, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.29947460595446584, |
| "grad_norm": 0.8574109893402403, |
| "learning_rate": 9.45690446594225e-06, |
| "loss": 0.1236, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.30122591943957966, |
| "grad_norm": 0.9262386331879862, |
| "learning_rate": 9.450653184694378e-06, |
| "loss": 0.2005, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.30297723292469353, |
| "grad_norm": 0.7994796498712383, |
| "learning_rate": 9.444368221978102e-06, |
| "loss": 0.1488, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.30472854640980734, |
| "grad_norm": 0.9739129710543231, |
| "learning_rate": 9.438049625356506e-06, |
| "loss": 0.2011, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3064798598949212, |
| "grad_norm": 1.7022881013682905, |
| "learning_rate": 9.431697442647199e-06, |
| "loss": 0.286, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.30823117338003503, |
| "grad_norm": 0.9765370631504982, |
| "learning_rate": 9.425311721921967e-06, |
| "loss": 0.1455, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.30998248686514884, |
| "grad_norm": 1.1358714034120214, |
| "learning_rate": 9.418892511506404e-06, |
| "loss": 0.1664, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3117338003502627, |
| "grad_norm": 1.0393192528807746, |
| "learning_rate": 9.412439859979543e-06, |
| "loss": 0.162, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3134851138353765, |
| "grad_norm": 1.096045737373684, |
| "learning_rate": 9.405953816173491e-06, |
| "loss": 0.1431, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.31523642732049034, |
| "grad_norm": 0.7637588357172228, |
| "learning_rate": 9.399434429173063e-06, |
| "loss": 0.1522, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3169877408056042, |
| "grad_norm": 1.1511627286236419, |
| "learning_rate": 9.392881748315403e-06, |
| "loss": 0.23, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.318739054290718, |
| "grad_norm": 1.296531852821544, |
| "learning_rate": 9.38629582318962e-06, |
| "loss": 0.1559, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3204903677758319, |
| "grad_norm": 1.0245849374018412, |
| "learning_rate": 9.379676703636402e-06, |
| "loss": 0.2058, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3222416812609457, |
| "grad_norm": 1.1325995765106882, |
| "learning_rate": 9.373024439747648e-06, |
| "loss": 0.1798, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3239929947460595, |
| "grad_norm": 0.955274718027506, |
| "learning_rate": 9.366339081866085e-06, |
| "loss": 0.1318, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3257443082311734, |
| "grad_norm": 1.0960146910727295, |
| "learning_rate": 9.359620680584889e-06, |
| "loss": 0.2125, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3274956217162872, |
| "grad_norm": 0.9976010826462164, |
| "learning_rate": 9.352869286747295e-06, |
| "loss": 0.1744, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.329246935201401, |
| "grad_norm": 1.0033232475788938, |
| "learning_rate": 9.34608495144622e-06, |
| "loss": 0.1712, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3309982486865149, |
| "grad_norm": 1.2252146471565943, |
| "learning_rate": 9.33926772602388e-06, |
| "loss": 0.1983, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.3327495621716287, |
| "grad_norm": 1.1760605780656463, |
| "learning_rate": 9.332417662071386e-06, |
| "loss": 0.1666, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3345008756567426, |
| "grad_norm": 1.3456892597616057, |
| "learning_rate": 9.32553481142837e-06, |
| "loss": 0.1829, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3362521891418564, |
| "grad_norm": 1.2660460244764533, |
| "learning_rate": 9.31861922618258e-06, |
| "loss": 0.2458, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3380035026269702, |
| "grad_norm": 1.0534767359485842, |
| "learning_rate": 9.311670958669502e-06, |
| "loss": 0.1874, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3397548161120841, |
| "grad_norm": 0.8458278928313304, |
| "learning_rate": 9.304690061471937e-06, |
| "loss": 0.1667, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.3415061295971979, |
| "grad_norm": 0.9903682726455234, |
| "learning_rate": 9.297676587419638e-06, |
| "loss": 0.2062, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3432574430823117, |
| "grad_norm": 1.1314930714580482, |
| "learning_rate": 9.290630589588876e-06, |
| "loss": 0.1794, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3450087565674256, |
| "grad_norm": 1.463207590968482, |
| "learning_rate": 9.283552121302064e-06, |
| "loss": 0.2053, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.3467600700525394, |
| "grad_norm": 1.0264128857179728, |
| "learning_rate": 9.276441236127343e-06, |
| "loss": 0.1463, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.34851138353765326, |
| "grad_norm": 1.1530074129076198, |
| "learning_rate": 9.269297987878168e-06, |
| "loss": 0.1918, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.3502626970227671, |
| "grad_norm": 0.9958162716820419, |
| "learning_rate": 9.262122430612922e-06, |
| "loss": 0.1474, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3520140105078809, |
| "grad_norm": 1.0886838150262181, |
| "learning_rate": 9.254914618634487e-06, |
| "loss": 0.2175, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.35376532399299476, |
| "grad_norm": 1.1036143709701502, |
| "learning_rate": 9.247674606489843e-06, |
| "loss": 0.141, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3555166374781086, |
| "grad_norm": 1.0910090805563288, |
| "learning_rate": 9.240402448969655e-06, |
| "loss": 0.1638, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3572679509632224, |
| "grad_norm": 1.0496526329879359, |
| "learning_rate": 9.233098201107854e-06, |
| "loss": 0.1745, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.35901926444833626, |
| "grad_norm": 1.0259634242913862, |
| "learning_rate": 9.225761918181224e-06, |
| "loss": 0.1554, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3607705779334501, |
| "grad_norm": 0.9318692908968823, |
| "learning_rate": 9.218393655708981e-06, |
| "loss": 0.1598, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.36252189141856395, |
| "grad_norm": 1.0675242528447997, |
| "learning_rate": 9.210993469452357e-06, |
| "loss": 0.2542, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.36427320490367776, |
| "grad_norm": 1.0231337468264674, |
| "learning_rate": 9.203561415414174e-06, |
| "loss": 0.1377, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3660245183887916, |
| "grad_norm": 0.8172962309380866, |
| "learning_rate": 9.196097549838422e-06, |
| "loss": 0.1337, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.36777583187390545, |
| "grad_norm": 1.3437004411676805, |
| "learning_rate": 9.188601929209836e-06, |
| "loss": 0.2003, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.36952714535901926, |
| "grad_norm": 0.9859223201620534, |
| "learning_rate": 9.181074610253457e-06, |
| "loss": 0.1246, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.3712784588441331, |
| "grad_norm": 1.1821125005665047, |
| "learning_rate": 9.173515649934222e-06, |
| "loss": 0.1918, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.37302977232924694, |
| "grad_norm": 1.3158062961704573, |
| "learning_rate": 9.165925105456513e-06, |
| "loss": 0.2639, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.37478108581436076, |
| "grad_norm": 1.3388147444695622, |
| "learning_rate": 9.15830303426374e-06, |
| "loss": 0.2246, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.37653239929947463, |
| "grad_norm": 0.8947375382446747, |
| "learning_rate": 9.150649494037895e-06, |
| "loss": 0.1225, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.37828371278458844, |
| "grad_norm": 1.103951490161312, |
| "learning_rate": 9.142964542699124e-06, |
| "loss": 0.2026, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.38003502626970226, |
| "grad_norm": 1.233924530688903, |
| "learning_rate": 9.135248238405282e-06, |
| "loss": 0.2086, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.38178633975481613, |
| "grad_norm": 1.1719877250784767, |
| "learning_rate": 9.127500639551497e-06, |
| "loss": 0.1795, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.38353765323992994, |
| "grad_norm": 1.255963695109249, |
| "learning_rate": 9.119721804769723e-06, |
| "loss": 0.1862, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.38528896672504376, |
| "grad_norm": 1.1459951961009553, |
| "learning_rate": 9.111911792928308e-06, |
| "loss": 0.1966, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.38704028021015763, |
| "grad_norm": 1.1761484428825753, |
| "learning_rate": 9.10407066313153e-06, |
| "loss": 0.1868, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.38879159369527144, |
| "grad_norm": 1.118245234998823, |
| "learning_rate": 9.096198474719169e-06, |
| "loss": 0.189, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3905429071803853, |
| "grad_norm": 1.1403077273341966, |
| "learning_rate": 9.088295287266042e-06, |
| "loss": 0.1752, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.3922942206654991, |
| "grad_norm": 1.1241899523938508, |
| "learning_rate": 9.080361160581569e-06, |
| "loss": 0.163, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.39404553415061294, |
| "grad_norm": 1.1244116274618798, |
| "learning_rate": 9.0723961547093e-06, |
| "loss": 0.1644, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3957968476357268, |
| "grad_norm": 1.1452039316203921, |
| "learning_rate": 9.064400329926476e-06, |
| "loss": 0.1677, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3975481611208406, |
| "grad_norm": 0.9169411289803756, |
| "learning_rate": 9.05637374674357e-06, |
| "loss": 0.1499, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.39929947460595444, |
| "grad_norm": 1.1320705178616042, |
| "learning_rate": 9.048316465903823e-06, |
| "loss": 0.1532, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4010507880910683, |
| "grad_norm": 1.1148728898419542, |
| "learning_rate": 9.04022854838279e-06, |
| "loss": 0.1745, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.4028021015761821, |
| "grad_norm": 0.9892668796106193, |
| "learning_rate": 9.032110055387881e-06, |
| "loss": 0.192, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.404553415061296, |
| "grad_norm": 1.2041757680734857, |
| "learning_rate": 9.023961048357885e-06, |
| "loss": 0.2152, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.4063047285464098, |
| "grad_norm": 1.1128606421374905, |
| "learning_rate": 9.015781588962524e-06, |
| "loss": 0.273, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4080560420315236, |
| "grad_norm": 1.0110467377360022, |
| "learning_rate": 9.007571739101968e-06, |
| "loss": 0.1625, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4098073555166375, |
| "grad_norm": 2.1812012536277385, |
| "learning_rate": 8.999331560906382e-06, |
| "loss": 0.437, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.4115586690017513, |
| "grad_norm": 1.2026156956732903, |
| "learning_rate": 8.991061116735437e-06, |
| "loss": 0.1843, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4133099824868651, |
| "grad_norm": 0.9335774970781566, |
| "learning_rate": 8.982760469177865e-06, |
| "loss": 0.1648, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.415061295971979, |
| "grad_norm": 0.9632646131071537, |
| "learning_rate": 8.974429681050957e-06, |
| "loss": 0.234, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4168126094570928, |
| "grad_norm": 1.1628290027919184, |
| "learning_rate": 8.966068815400108e-06, |
| "loss": 0.243, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.4185639229422067, |
| "grad_norm": 0.9926771048841662, |
| "learning_rate": 8.957677935498328e-06, |
| "loss": 0.2181, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.4203152364273205, |
| "grad_norm": 1.2100489148340519, |
| "learning_rate": 8.949257104845772e-06, |
| "loss": 0.1799, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4220665499124343, |
| "grad_norm": 0.9686376813257808, |
| "learning_rate": 8.94080638716925e-06, |
| "loss": 0.1412, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4238178633975482, |
| "grad_norm": 1.0304072209793849, |
| "learning_rate": 8.932325846421755e-06, |
| "loss": 0.1608, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.425569176882662, |
| "grad_norm": 1.1068555551888453, |
| "learning_rate": 8.923815546781968e-06, |
| "loss": 0.1736, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4273204903677758, |
| "grad_norm": 1.1406345353775849, |
| "learning_rate": 8.915275552653786e-06, |
| "loss": 0.1856, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.4290718038528897, |
| "grad_norm": 0.824906662216677, |
| "learning_rate": 8.906705928665818e-06, |
| "loss": 0.1241, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4308231173380035, |
| "grad_norm": 0.9817782743515178, |
| "learning_rate": 8.898106739670908e-06, |
| "loss": 0.1391, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.43257443082311736, |
| "grad_norm": 1.0061540033779128, |
| "learning_rate": 8.889478050745646e-06, |
| "loss": 0.1487, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.4343257443082312, |
| "grad_norm": 1.319181249331738, |
| "learning_rate": 8.88081992718986e-06, |
| "loss": 0.2516, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.436077057793345, |
| "grad_norm": 1.0168251468902272, |
| "learning_rate": 8.872132434526144e-06, |
| "loss": 0.1601, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.43782837127845886, |
| "grad_norm": 0.903893586373318, |
| "learning_rate": 8.863415638499341e-06, |
| "loss": 0.1675, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4395796847635727, |
| "grad_norm": 1.5466100134300305, |
| "learning_rate": 8.854669605076058e-06, |
| "loss": 0.2004, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4413309982486865, |
| "grad_norm": 0.8990563567570562, |
| "learning_rate": 8.845894400444163e-06, |
| "loss": 0.1505, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.44308231173380036, |
| "grad_norm": 1.2358463965854751, |
| "learning_rate": 8.837090091012289e-06, |
| "loss": 0.188, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.4448336252189142, |
| "grad_norm": 0.8631731423566544, |
| "learning_rate": 8.82825674340932e-06, |
| "loss": 0.1085, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.44658493870402804, |
| "grad_norm": 0.851054134724772, |
| "learning_rate": 8.819394424483898e-06, |
| "loss": 0.1553, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.44833625218914186, |
| "grad_norm": 0.9747188919128223, |
| "learning_rate": 8.810503201303914e-06, |
| "loss": 0.1429, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4500875656742557, |
| "grad_norm": 1.1817384021065651, |
| "learning_rate": 8.801583141155993e-06, |
| "loss": 0.1714, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.45183887915936954, |
| "grad_norm": 1.1497461858404032, |
| "learning_rate": 8.792634311545002e-06, |
| "loss": 0.1654, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.45359019264448336, |
| "grad_norm": 1.0653546734905857, |
| "learning_rate": 8.78365678019352e-06, |
| "loss": 0.1561, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.4553415061295972, |
| "grad_norm": 0.9098419050905309, |
| "learning_rate": 8.774650615041332e-06, |
| "loss": 0.1446, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.45709281961471104, |
| "grad_norm": 1.3562833349770014, |
| "learning_rate": 8.765615884244925e-06, |
| "loss": 0.1887, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.45884413309982486, |
| "grad_norm": 1.584928142177982, |
| "learning_rate": 8.75655265617696e-06, |
| "loss": 0.2126, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.46059544658493873, |
| "grad_norm": 1.25237114313114, |
| "learning_rate": 8.747460999425755e-06, |
| "loss": 0.1999, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.46234676007005254, |
| "grad_norm": 1.0137290535234078, |
| "learning_rate": 8.738340982794775e-06, |
| "loss": 0.1567, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.46409807355516636, |
| "grad_norm": 0.9990455785944722, |
| "learning_rate": 8.729192675302104e-06, |
| "loss": 0.1817, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.4658493870402802, |
| "grad_norm": 0.9098201735226615, |
| "learning_rate": 8.720016146179921e-06, |
| "loss": 0.181, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.46760070052539404, |
| "grad_norm": 0.8339374481166864, |
| "learning_rate": 8.710811464873984e-06, |
| "loss": 0.13, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.46935201401050786, |
| "grad_norm": 0.8247199593689756, |
| "learning_rate": 8.701578701043097e-06, |
| "loss": 0.116, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.4711033274956217, |
| "grad_norm": 1.2251926045855088, |
| "learning_rate": 8.692317924558586e-06, |
| "loss": 0.2267, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.47285464098073554, |
| "grad_norm": 1.323949068367743, |
| "learning_rate": 8.683029205503772e-06, |
| "loss": 0.2413, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4746059544658494, |
| "grad_norm": 0.8694038746666335, |
| "learning_rate": 8.67371261417344e-06, |
| "loss": 0.1314, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4763572679509632, |
| "grad_norm": 1.4214200720496517, |
| "learning_rate": 8.664368221073297e-06, |
| "loss": 0.2074, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.47810858143607704, |
| "grad_norm": 0.8164989303563484, |
| "learning_rate": 8.65499609691946e-06, |
| "loss": 0.1142, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.4798598949211909, |
| "grad_norm": 0.9950353997188668, |
| "learning_rate": 8.645596312637895e-06, |
| "loss": 0.2059, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.4816112084063047, |
| "grad_norm": 1.1741807018487027, |
| "learning_rate": 8.636168939363905e-06, |
| "loss": 0.1762, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.48336252189141854, |
| "grad_norm": 1.1639496884105414, |
| "learning_rate": 8.62671404844157e-06, |
| "loss": 0.1946, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4851138353765324, |
| "grad_norm": 1.24592549170423, |
| "learning_rate": 8.617231711423222e-06, |
| "loss": 0.2481, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.4868651488616462, |
| "grad_norm": 1.6788228325648948, |
| "learning_rate": 8.607722000068898e-06, |
| "loss": 0.2368, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.4886164623467601, |
| "grad_norm": 0.9120565830493937, |
| "learning_rate": 8.598184986345797e-06, |
| "loss": 0.1528, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.4903677758318739, |
| "grad_norm": 1.0407774323531322, |
| "learning_rate": 8.588620742427733e-06, |
| "loss": 0.1481, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4921190893169877, |
| "grad_norm": 0.8774818082676015, |
| "learning_rate": 8.579029340694596e-06, |
| "loss": 0.167, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.4938704028021016, |
| "grad_norm": 0.7302486166777703, |
| "learning_rate": 8.569410853731799e-06, |
| "loss": 0.1339, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4956217162872154, |
| "grad_norm": 1.4977744846377492, |
| "learning_rate": 8.559765354329728e-06, |
| "loss": 0.2384, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.4973730297723292, |
| "grad_norm": 1.3333579162401141, |
| "learning_rate": 8.55009291548319e-06, |
| "loss": 0.2047, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.4991243432574431, |
| "grad_norm": 1.0371512553867137, |
| "learning_rate": 8.540393610390871e-06, |
| "loss": 0.2014, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.500875656742557, |
| "grad_norm": 5.507551141823795, |
| "learning_rate": 8.530667512454765e-06, |
| "loss": 0.2963, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5026269702276708, |
| "grad_norm": 1.0814488316677124, |
| "learning_rate": 8.520914695279632e-06, |
| "loss": 0.1799, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5043782837127846, |
| "grad_norm": 1.0877989498247866, |
| "learning_rate": 8.511135232672442e-06, |
| "loss": 0.2273, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5061295971978984, |
| "grad_norm": 1.1546522986634278, |
| "learning_rate": 8.501329198641802e-06, |
| "loss": 0.1699, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5078809106830122, |
| "grad_norm": 0.9955478078874541, |
| "learning_rate": 8.491496667397409e-06, |
| "loss": 0.1616, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5096322241681261, |
| "grad_norm": 1.0749336259458449, |
| "learning_rate": 8.481637713349486e-06, |
| "loss": 0.2121, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.51138353765324, |
| "grad_norm": 1.2722939684715049, |
| "learning_rate": 8.471752411108216e-06, |
| "loss": 0.1619, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5131348511383538, |
| "grad_norm": 1.31720477548223, |
| "learning_rate": 8.461840835483179e-06, |
| "loss": 0.2357, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5148861646234676, |
| "grad_norm": 1.0512404750610869, |
| "learning_rate": 8.451903061482787e-06, |
| "loss": 0.2039, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5166374781085814, |
| "grad_norm": 1.095035695042105, |
| "learning_rate": 8.44193916431371e-06, |
| "loss": 0.1386, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5183887915936952, |
| "grad_norm": 1.2777241490218374, |
| "learning_rate": 8.431949219380319e-06, |
| "loss": 0.2109, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5201401050788091, |
| "grad_norm": 1.0873909511926902, |
| "learning_rate": 8.421933302284102e-06, |
| "loss": 0.1584, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.521891418563923, |
| "grad_norm": 0.9773041895311751, |
| "learning_rate": 8.411891488823102e-06, |
| "loss": 0.1512, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5236427320490368, |
| "grad_norm": 1.1584559931137173, |
| "learning_rate": 8.401823854991338e-06, |
| "loss": 0.1974, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5253940455341506, |
| "grad_norm": 0.9617777428924835, |
| "learning_rate": 8.391730476978229e-06, |
| "loss": 0.1536, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5271453590192644, |
| "grad_norm": 1.1847455857829146, |
| "learning_rate": 8.381611431168027e-06, |
| "loss": 0.1949, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.5288966725043783, |
| "grad_norm": 1.0558725080734255, |
| "learning_rate": 8.37146679413922e-06, |
| "loss": 0.1873, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5306479859894921, |
| "grad_norm": 0.9708895644033165, |
| "learning_rate": 8.361296642663977e-06, |
| "loss": 0.1851, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.532399299474606, |
| "grad_norm": 1.0628725551453226, |
| "learning_rate": 8.351101053707545e-06, |
| "loss": 0.169, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5341506129597198, |
| "grad_norm": 0.9196106999391176, |
| "learning_rate": 8.34088010442768e-06, |
| "loss": 0.159, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5359019264448336, |
| "grad_norm": 1.0255932075584107, |
| "learning_rate": 8.330633872174057e-06, |
| "loss": 0.1701, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.5376532399299475, |
| "grad_norm": 1.0762624625214643, |
| "learning_rate": 8.320362434487688e-06, |
| "loss": 0.1644, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.5394045534150613, |
| "grad_norm": 0.7633404300233255, |
| "learning_rate": 8.310065869100332e-06, |
| "loss": 0.123, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5411558669001751, |
| "grad_norm": 0.8699176281503174, |
| "learning_rate": 8.299744253933908e-06, |
| "loss": 0.1351, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.542907180385289, |
| "grad_norm": 1.2275049904001125, |
| "learning_rate": 8.289397667099909e-06, |
| "loss": 0.1675, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5446584938704028, |
| "grad_norm": 1.2101635020956292, |
| "learning_rate": 8.279026186898805e-06, |
| "loss": 0.1738, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.5464098073555166, |
| "grad_norm": 0.9106260951604767, |
| "learning_rate": 8.26862989181945e-06, |
| "loss": 0.1338, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5481611208406305, |
| "grad_norm": 1.1480139615147442, |
| "learning_rate": 8.258208860538498e-06, |
| "loss": 0.1958, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5499124343257443, |
| "grad_norm": 1.543005528446332, |
| "learning_rate": 8.247763171919795e-06, |
| "loss": 0.1611, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5516637478108581, |
| "grad_norm": 1.201047458056005, |
| "learning_rate": 8.237292905013792e-06, |
| "loss": 0.2219, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.553415061295972, |
| "grad_norm": 0.9962169779650556, |
| "learning_rate": 8.226798139056938e-06, |
| "loss": 0.1751, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5551663747810858, |
| "grad_norm": 1.0391141659299086, |
| "learning_rate": 8.216278953471088e-06, |
| "loss": 0.1907, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.5569176882661997, |
| "grad_norm": 0.8878841272733307, |
| "learning_rate": 8.205735427862897e-06, |
| "loss": 0.1392, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5586690017513135, |
| "grad_norm": 0.8815771076681298, |
| "learning_rate": 8.19516764202322e-06, |
| "loss": 0.2054, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.5604203152364273, |
| "grad_norm": 1.3561357204987174, |
| "learning_rate": 8.184575675926511e-06, |
| "loss": 0.162, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5621716287215411, |
| "grad_norm": 1.1436127474045497, |
| "learning_rate": 8.173959609730209e-06, |
| "loss": 0.1553, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.563922942206655, |
| "grad_norm": 0.8696831532462574, |
| "learning_rate": 8.16331952377414e-06, |
| "loss": 0.1614, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5656742556917689, |
| "grad_norm": 1.243551733126445, |
| "learning_rate": 8.152655498579903e-06, |
| "loss": 0.241, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.5674255691768827, |
| "grad_norm": 1.0649177969550163, |
| "learning_rate": 8.141967614850265e-06, |
| "loss": 0.1368, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5691768826619965, |
| "grad_norm": 1.1762554635110793, |
| "learning_rate": 8.131255953468553e-06, |
| "loss": 0.2207, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5709281961471103, |
| "grad_norm": 1.2132153902490277, |
| "learning_rate": 8.120520595498029e-06, |
| "loss": 0.1887, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5726795096322241, |
| "grad_norm": 0.7399066544679602, |
| "learning_rate": 8.10976162218129e-06, |
| "loss": 0.1267, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.574430823117338, |
| "grad_norm": 0.9899573210939664, |
| "learning_rate": 8.09897911493965e-06, |
| "loss": 0.1685, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5761821366024519, |
| "grad_norm": 1.3337051985288606, |
| "learning_rate": 8.088173155372517e-06, |
| "loss": 0.3282, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.5779334500875657, |
| "grad_norm": 1.005083830754978, |
| "learning_rate": 8.077343825256783e-06, |
| "loss": 0.2075, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5796847635726795, |
| "grad_norm": 0.8802289529518278, |
| "learning_rate": 8.066491206546206e-06, |
| "loss": 0.1254, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.5814360770577933, |
| "grad_norm": 1.2400889336140388, |
| "learning_rate": 8.055615381370781e-06, |
| "loss": 0.185, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5831873905429071, |
| "grad_norm": 0.8545769679622639, |
| "learning_rate": 8.044716432036126e-06, |
| "loss": 0.1352, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.5849387040280211, |
| "grad_norm": 1.2907830562873122, |
| "learning_rate": 8.033794441022857e-06, |
| "loss": 0.2143, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5866900175131349, |
| "grad_norm": 1.2934031358525784, |
| "learning_rate": 8.022849490985966e-06, |
| "loss": 0.2373, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.5884413309982487, |
| "grad_norm": 1.0494212000075114, |
| "learning_rate": 8.011881664754193e-06, |
| "loss": 0.21, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5901926444833625, |
| "grad_norm": 1.3713545247159777, |
| "learning_rate": 8.000891045329394e-06, |
| "loss": 0.1956, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.5919439579684763, |
| "grad_norm": 1.0044217822469004, |
| "learning_rate": 7.989877715885925e-06, |
| "loss": 0.1455, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5936952714535902, |
| "grad_norm": 0.969157755217139, |
| "learning_rate": 7.97884175977e-06, |
| "loss": 0.1858, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.5954465849387041, |
| "grad_norm": 0.9043027733845134, |
| "learning_rate": 7.967783260499073e-06, |
| "loss": 0.1307, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5971978984238179, |
| "grad_norm": 0.7943124415991508, |
| "learning_rate": 7.956702301761195e-06, |
| "loss": 0.1142, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5989492119089317, |
| "grad_norm": 0.9004231757301827, |
| "learning_rate": 7.945598967414386e-06, |
| "loss": 0.1908, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6007005253940455, |
| "grad_norm": 1.1105437759236843, |
| "learning_rate": 7.934473341485998e-06, |
| "loss": 0.2115, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.6024518388791593, |
| "grad_norm": 0.9301936250430058, |
| "learning_rate": 7.92332550817208e-06, |
| "loss": 0.1647, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6042031523642732, |
| "grad_norm": 1.047767148226068, |
| "learning_rate": 7.912155551836743e-06, |
| "loss": 0.2355, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6059544658493871, |
| "grad_norm": 0.9613875662244317, |
| "learning_rate": 7.900963557011519e-06, |
| "loss": 0.171, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6077057793345009, |
| "grad_norm": 1.2164758034474923, |
| "learning_rate": 7.88974960839472e-06, |
| "loss": 0.155, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.6094570928196147, |
| "grad_norm": 1.056374791415413, |
| "learning_rate": 7.878513790850805e-06, |
| "loss": 0.1732, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6112084063047285, |
| "grad_norm": 1.2496494090012524, |
| "learning_rate": 7.867256189409724e-06, |
| "loss": 0.1835, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.6129597197898424, |
| "grad_norm": 0.6631622478113742, |
| "learning_rate": 7.855976889266288e-06, |
| "loss": 0.118, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6147110332749562, |
| "grad_norm": 0.834904245415055, |
| "learning_rate": 7.844675975779514e-06, |
| "loss": 0.1363, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6164623467600701, |
| "grad_norm": 1.0623504091270612, |
| "learning_rate": 7.833353534471988e-06, |
| "loss": 0.1341, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6182136602451839, |
| "grad_norm": 1.002067391498751, |
| "learning_rate": 7.82200965102921e-06, |
| "loss": 0.1388, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6199649737302977, |
| "grad_norm": 1.2517283119177405, |
| "learning_rate": 7.810644411298951e-06, |
| "loss": 0.2027, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6217162872154116, |
| "grad_norm": 0.9691375625821814, |
| "learning_rate": 7.799257901290597e-06, |
| "loss": 0.1918, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6234676007005254, |
| "grad_norm": 1.257025304991851, |
| "learning_rate": 7.787850207174512e-06, |
| "loss": 0.1984, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6252189141856392, |
| "grad_norm": 1.2989884834762626, |
| "learning_rate": 7.776421415281368e-06, |
| "loss": 0.2251, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.626970227670753, |
| "grad_norm": 0.9608457991453937, |
| "learning_rate": 7.764971612101497e-06, |
| "loss": 0.1598, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6287215411558669, |
| "grad_norm": 1.055564436388202, |
| "learning_rate": 7.753500884284251e-06, |
| "loss": 0.1506, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.6304728546409807, |
| "grad_norm": 0.8536479488591544, |
| "learning_rate": 7.742009318637323e-06, |
| "loss": 0.1023, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6322241681260946, |
| "grad_norm": 1.1845728329626128, |
| "learning_rate": 7.730497002126105e-06, |
| "loss": 0.1584, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.6339754816112084, |
| "grad_norm": 0.8928523838563781, |
| "learning_rate": 7.718964021873035e-06, |
| "loss": 0.1084, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.6357267950963222, |
| "grad_norm": 0.9835590981476029, |
| "learning_rate": 7.707410465156916e-06, |
| "loss": 0.1638, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.637478108581436, |
| "grad_norm": 0.9201578437929335, |
| "learning_rate": 7.695836419412277e-06, |
| "loss": 0.1239, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6392294220665499, |
| "grad_norm": 0.728467005507231, |
| "learning_rate": 7.684241972228702e-06, |
| "loss": 0.1244, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6409807355516638, |
| "grad_norm": 0.918030927901545, |
| "learning_rate": 7.672627211350164e-06, |
| "loss": 0.1466, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6427320490367776, |
| "grad_norm": 1.2979769917204351, |
| "learning_rate": 7.660992224674371e-06, |
| "loss": 0.2255, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.6444833625218914, |
| "grad_norm": 0.771343645637159, |
| "learning_rate": 7.649337100252091e-06, |
| "loss": 0.1293, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.6462346760070052, |
| "grad_norm": 1.1242763102043734, |
| "learning_rate": 7.637661926286493e-06, |
| "loss": 0.2268, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.647985989492119, |
| "grad_norm": 1.024519873593415, |
| "learning_rate": 7.625966791132469e-06, |
| "loss": 0.1664, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.649737302977233, |
| "grad_norm": 0.9346453233331181, |
| "learning_rate": 7.614251783295981e-06, |
| "loss": 0.1493, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.6514886164623468, |
| "grad_norm": 1.032430577115044, |
| "learning_rate": 7.602516991433376e-06, |
| "loss": 0.168, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.6532399299474606, |
| "grad_norm": 1.2756452983998474, |
| "learning_rate": 7.590762504350729e-06, |
| "loss": 0.2004, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.6549912434325744, |
| "grad_norm": 1.0952734861105042, |
| "learning_rate": 7.578988411003156e-06, |
| "loss": 0.2038, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6567425569176882, |
| "grad_norm": 0.9982958673516226, |
| "learning_rate": 7.567194800494154e-06, |
| "loss": 0.1722, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.658493870402802, |
| "grad_norm": 1.2747360607076716, |
| "learning_rate": 7.555381762074918e-06, |
| "loss": 0.1977, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.660245183887916, |
| "grad_norm": 1.48637993886011, |
| "learning_rate": 7.543549385143667e-06, |
| "loss": 0.2751, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.6619964973730298, |
| "grad_norm": 1.0127675566111833, |
| "learning_rate": 7.531697759244978e-06, |
| "loss": 0.1556, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6637478108581436, |
| "grad_norm": 1.0835339789681326, |
| "learning_rate": 7.519826974069088e-06, |
| "loss": 0.1884, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.6654991243432574, |
| "grad_norm": 0.9837633602612155, |
| "learning_rate": 7.507937119451234e-06, |
| "loss": 0.1823, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6672504378283712, |
| "grad_norm": 1.392684179495506, |
| "learning_rate": 7.496028285370966e-06, |
| "loss": 0.1912, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.6690017513134852, |
| "grad_norm": 1.0916850999558239, |
| "learning_rate": 7.484100561951459e-06, |
| "loss": 0.2066, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.670753064798599, |
| "grad_norm": 0.8680971975778679, |
| "learning_rate": 7.472154039458851e-06, |
| "loss": 0.1754, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.6725043782837128, |
| "grad_norm": 1.0435891343464805, |
| "learning_rate": 7.460188808301532e-06, |
| "loss": 0.1318, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.6742556917688266, |
| "grad_norm": 1.042398115064006, |
| "learning_rate": 7.448204959029484e-06, |
| "loss": 0.2022, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6760070052539404, |
| "grad_norm": 0.9731481469841672, |
| "learning_rate": 7.436202582333587e-06, |
| "loss": 0.13, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6777583187390543, |
| "grad_norm": 1.386034337965727, |
| "learning_rate": 7.4241817690449235e-06, |
| "loss": 0.2216, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.6795096322241682, |
| "grad_norm": 1.2608682646343083, |
| "learning_rate": 7.41214261013411e-06, |
| "loss": 0.1966, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.681260945709282, |
| "grad_norm": 1.1020654673893056, |
| "learning_rate": 7.40008519671059e-06, |
| "loss": 0.1762, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.6830122591943958, |
| "grad_norm": 1.0019379870434075, |
| "learning_rate": 7.3880096200219585e-06, |
| "loss": 0.1436, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6847635726795096, |
| "grad_norm": 0.9922002827434665, |
| "learning_rate": 7.375915971453264e-06, |
| "loss": 0.159, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.6865148861646234, |
| "grad_norm": 1.2392846396033714, |
| "learning_rate": 7.363804342526315e-06, |
| "loss": 0.1972, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6882661996497373, |
| "grad_norm": 0.8963807218487707, |
| "learning_rate": 7.3516748248989955e-06, |
| "loss": 0.1921, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.6900175131348512, |
| "grad_norm": 1.1130741139227058, |
| "learning_rate": 7.339527510364567e-06, |
| "loss": 0.1459, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.691768826619965, |
| "grad_norm": 0.8075594361508924, |
| "learning_rate": 7.327362490850971e-06, |
| "loss": 0.1379, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.6935201401050788, |
| "grad_norm": 1.0062548542126621, |
| "learning_rate": 7.315179858420138e-06, |
| "loss": 0.1758, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6952714535901926, |
| "grad_norm": 0.9717163669556567, |
| "learning_rate": 7.302979705267286e-06, |
| "loss": 0.1538, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.6970227670753065, |
| "grad_norm": 1.0413579747819772, |
| "learning_rate": 7.2907621237202275e-06, |
| "loss": 0.1535, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6987740805604203, |
| "grad_norm": 0.9776202668633781, |
| "learning_rate": 7.2785272062386715e-06, |
| "loss": 0.2237, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.7005253940455342, |
| "grad_norm": 0.9919980255734399, |
| "learning_rate": 7.266275045413517e-06, |
| "loss": 0.1759, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.702276707530648, |
| "grad_norm": 1.1333937112664494, |
| "learning_rate": 7.254005733966159e-06, |
| "loss": 0.26, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.7040280210157618, |
| "grad_norm": 0.9978679010728084, |
| "learning_rate": 7.241719364747781e-06, |
| "loss": 0.146, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7057793345008757, |
| "grad_norm": 1.0310148677742963, |
| "learning_rate": 7.229416030738661e-06, |
| "loss": 0.1358, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.7075306479859895, |
| "grad_norm": 0.9210280630631851, |
| "learning_rate": 7.217095825047455e-06, |
| "loss": 0.1368, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.7092819614711033, |
| "grad_norm": 0.8458172902391173, |
| "learning_rate": 7.204758840910509e-06, |
| "loss": 0.1548, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.7110332749562172, |
| "grad_norm": 1.0007666191428417, |
| "learning_rate": 7.192405171691138e-06, |
| "loss": 0.1358, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.712784588441331, |
| "grad_norm": 1.1791422782221483, |
| "learning_rate": 7.180034910878926e-06, |
| "loss": 0.2027, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.7145359019264448, |
| "grad_norm": 1.3622226936019624, |
| "learning_rate": 7.167648152089017e-06, |
| "loss": 0.1446, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.7162872154115587, |
| "grad_norm": 1.260377975347423, |
| "learning_rate": 7.155244989061415e-06, |
| "loss": 0.1394, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.7180385288966725, |
| "grad_norm": 0.7647098429722478, |
| "learning_rate": 7.142825515660259e-06, |
| "loss": 0.1436, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7197898423817863, |
| "grad_norm": 1.0806203976791042, |
| "learning_rate": 7.130389825873125e-06, |
| "loss": 0.1472, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.7215411558669002, |
| "grad_norm": 1.160580416868669, |
| "learning_rate": 7.1179380138103105e-06, |
| "loss": 0.1709, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.723292469352014, |
| "grad_norm": 1.2669865482460725, |
| "learning_rate": 7.105470173704121e-06, |
| "loss": 0.1692, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.7250437828371279, |
| "grad_norm": 0.8979827897826866, |
| "learning_rate": 7.092986399908158e-06, |
| "loss": 0.1538, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.7267950963222417, |
| "grad_norm": 1.1613876219039976, |
| "learning_rate": 7.08048678689661e-06, |
| "loss": 0.1627, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7285464098073555, |
| "grad_norm": 0.7088309545286363, |
| "learning_rate": 7.067971429263527e-06, |
| "loss": 0.0933, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7302977232924693, |
| "grad_norm": 0.9953579341793583, |
| "learning_rate": 7.055440421722113e-06, |
| "loss": 0.1546, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.7320490367775832, |
| "grad_norm": 0.893498402926208, |
| "learning_rate": 7.042893859104008e-06, |
| "loss": 0.1647, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.7338003502626971, |
| "grad_norm": 0.9606538728371828, |
| "learning_rate": 7.030331836358565e-06, |
| "loss": 0.1584, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.7355516637478109, |
| "grad_norm": 0.9087697947323283, |
| "learning_rate": 7.017754448552141e-06, |
| "loss": 0.1489, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7373029772329247, |
| "grad_norm": 1.0980936242324373, |
| "learning_rate": 7.0051617908673685e-06, |
| "loss": 0.1909, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.7390542907180385, |
| "grad_norm": 1.0679212943502743, |
| "learning_rate": 6.992553958602439e-06, |
| "loss": 0.1933, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.7408056042031523, |
| "grad_norm": 1.0201469343275764, |
| "learning_rate": 6.979931047170382e-06, |
| "loss": 0.19, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.7425569176882661, |
| "grad_norm": 1.153726254550419, |
| "learning_rate": 6.967293152098345e-06, |
| "loss": 0.2164, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.7443082311733801, |
| "grad_norm": 0.9986655789682201, |
| "learning_rate": 6.954640369026861e-06, |
| "loss": 0.1681, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.7460595446584939, |
| "grad_norm": 0.7808715269507883, |
| "learning_rate": 6.941972793709141e-06, |
| "loss": 0.1427, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7478108581436077, |
| "grad_norm": 0.9806851736858181, |
| "learning_rate": 6.929290522010332e-06, |
| "loss": 0.1558, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.7495621716287215, |
| "grad_norm": 1.1151483469529613, |
| "learning_rate": 6.9165936499068065e-06, |
| "loss": 0.1851, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.7513134851138353, |
| "grad_norm": 1.0226642337566068, |
| "learning_rate": 6.903882273485425e-06, |
| "loss": 0.1406, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.7530647985989493, |
| "grad_norm": 1.2154853234808298, |
| "learning_rate": 6.891156488942812e-06, |
| "loss": 0.2281, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7548161120840631, |
| "grad_norm": 0.9124967173578454, |
| "learning_rate": 6.878416392584635e-06, |
| "loss": 0.1502, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.7565674255691769, |
| "grad_norm": 1.4028442036532214, |
| "learning_rate": 6.865662080824864e-06, |
| "loss": 0.2161, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.7583187390542907, |
| "grad_norm": 0.975649535916596, |
| "learning_rate": 6.852893650185051e-06, |
| "loss": 0.1655, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.7600700525394045, |
| "grad_norm": 1.1154571470663182, |
| "learning_rate": 6.840111197293594e-06, |
| "loss": 0.2768, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.7618213660245184, |
| "grad_norm": 1.2620416059011632, |
| "learning_rate": 6.8273148188850105e-06, |
| "loss": 0.2549, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.7635726795096323, |
| "grad_norm": 0.9710561696847746, |
| "learning_rate": 6.814504611799202e-06, |
| "loss": 0.1068, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.7653239929947461, |
| "grad_norm": 0.8701266642614385, |
| "learning_rate": 6.801680672980722e-06, |
| "loss": 0.1272, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.7670753064798599, |
| "grad_norm": 1.1987353253288306, |
| "learning_rate": 6.788843099478041e-06, |
| "loss": 0.2027, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7688266199649737, |
| "grad_norm": 1.1590697629080275, |
| "learning_rate": 6.775991988442816e-06, |
| "loss": 0.2143, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.7705779334500875, |
| "grad_norm": 1.0846847919101628, |
| "learning_rate": 6.763127437129151e-06, |
| "loss": 0.1705, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7723292469352014, |
| "grad_norm": 0.8580193306506241, |
| "learning_rate": 6.750249542892863e-06, |
| "loss": 0.175, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.7740805604203153, |
| "grad_norm": 1.5775617568317908, |
| "learning_rate": 6.737358403190746e-06, |
| "loss": 0.2081, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7758318739054291, |
| "grad_norm": 0.9624673340593134, |
| "learning_rate": 6.724454115579832e-06, |
| "loss": 0.1098, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.7775831873905429, |
| "grad_norm": 1.0607534980440978, |
| "learning_rate": 6.711536777716654e-06, |
| "loss": 0.1803, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.7793345008756567, |
| "grad_norm": 1.1255975448505335, |
| "learning_rate": 6.698606487356503e-06, |
| "loss": 0.1872, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.7810858143607706, |
| "grad_norm": 0.8928366518808328, |
| "learning_rate": 6.685663342352693e-06, |
| "loss": 0.1466, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7828371278458844, |
| "grad_norm": 1.0817838600370688, |
| "learning_rate": 6.6727074406558224e-06, |
| "loss": 0.1663, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.7845884413309983, |
| "grad_norm": 1.1115633011949881, |
| "learning_rate": 6.659738880313025e-06, |
| "loss": 0.1598, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.7863397548161121, |
| "grad_norm": 1.0231529929725236, |
| "learning_rate": 6.646757759467234e-06, |
| "loss": 0.155, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.7880910683012259, |
| "grad_norm": 1.2411539450968938, |
| "learning_rate": 6.633764176356434e-06, |
| "loss": 0.1915, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7898423817863398, |
| "grad_norm": 1.0708800647966268, |
| "learning_rate": 6.620758229312927e-06, |
| "loss": 0.1385, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.7915936952714536, |
| "grad_norm": 1.0683250200759222, |
| "learning_rate": 6.6077400167625784e-06, |
| "loss": 0.1663, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.7933450087565674, |
| "grad_norm": 1.250391733683125, |
| "learning_rate": 6.594709637224075e-06, |
| "loss": 0.1996, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.7950963222416813, |
| "grad_norm": 1.1798794300569202, |
| "learning_rate": 6.581667189308185e-06, |
| "loss": 0.146, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.7968476357267951, |
| "grad_norm": 1.4398604943836388, |
| "learning_rate": 6.5686127717170015e-06, |
| "loss": 0.3225, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.7985989492119089, |
| "grad_norm": 0.9313043884553712, |
| "learning_rate": 6.555546483243205e-06, |
| "loss": 0.1389, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8003502626970228, |
| "grad_norm": 1.1833167749805182, |
| "learning_rate": 6.542468422769311e-06, |
| "loss": 0.2136, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.8021015761821366, |
| "grad_norm": 0.8648334244503824, |
| "learning_rate": 6.529378689266923e-06, |
| "loss": 0.1878, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.8038528896672504, |
| "grad_norm": 0.9949458854079241, |
| "learning_rate": 6.516277381795984e-06, |
| "loss": 0.1497, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.8056042031523643, |
| "grad_norm": 1.1191303302941829, |
| "learning_rate": 6.503164599504022e-06, |
| "loss": 0.1566, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8073555166374781, |
| "grad_norm": 1.1298654961289165, |
| "learning_rate": 6.490040441625407e-06, |
| "loss": 0.2017, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.809106830122592, |
| "grad_norm": 1.0947278566055827, |
| "learning_rate": 6.476905007480597e-06, |
| "loss": 0.1525, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.8108581436077058, |
| "grad_norm": 1.1323194203448552, |
| "learning_rate": 6.4637583964753855e-06, |
| "loss": 0.2241, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.8126094570928196, |
| "grad_norm": 1.0459409005734945, |
| "learning_rate": 6.45060070810015e-06, |
| "loss": 0.1296, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.8143607705779334, |
| "grad_norm": 1.1316198669484385, |
| "learning_rate": 6.437432041929097e-06, |
| "loss": 0.1621, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.8161120840630472, |
| "grad_norm": 0.9491729705590622, |
| "learning_rate": 6.424252497619511e-06, |
| "loss": 0.1547, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.8178633975481612, |
| "grad_norm": 1.0195018137674068, |
| "learning_rate": 6.4110621749110014e-06, |
| "loss": 0.1424, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.819614711033275, |
| "grad_norm": 1.2170884375327042, |
| "learning_rate": 6.397861173624745e-06, |
| "loss": 0.2018, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.8213660245183888, |
| "grad_norm": 1.5918684168233677, |
| "learning_rate": 6.384649593662733e-06, |
| "loss": 0.1759, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.8231173380035026, |
| "grad_norm": 1.1612346799052706, |
| "learning_rate": 6.371427535007008e-06, |
| "loss": 0.1944, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8248686514886164, |
| "grad_norm": 1.01990361540596, |
| "learning_rate": 6.358195097718917e-06, |
| "loss": 0.2028, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.8266199649737302, |
| "grad_norm": 0.9233804242151922, |
| "learning_rate": 6.344952381938354e-06, |
| "loss": 0.1768, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.8283712784588442, |
| "grad_norm": 1.1968348714557342, |
| "learning_rate": 6.331699487882987e-06, |
| "loss": 0.1657, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.830122591943958, |
| "grad_norm": 1.4115850068938127, |
| "learning_rate": 6.318436515847525e-06, |
| "loss": 0.2006, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.8318739054290718, |
| "grad_norm": 0.9507911762810964, |
| "learning_rate": 6.30516356620293e-06, |
| "loss": 0.1495, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8336252189141856, |
| "grad_norm": 1.1430728607474907, |
| "learning_rate": 6.291880739395683e-06, |
| "loss": 0.1722, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.8353765323992994, |
| "grad_norm": 1.343500691590876, |
| "learning_rate": 6.278588135947011e-06, |
| "loss": 0.2047, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.8371278458844134, |
| "grad_norm": 1.2595243490259276, |
| "learning_rate": 6.265285856452123e-06, |
| "loss": 0.2214, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.8388791593695272, |
| "grad_norm": 1.1669616297664058, |
| "learning_rate": 6.251974001579459e-06, |
| "loss": 0.1724, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.840630472854641, |
| "grad_norm": 1.0844521569107943, |
| "learning_rate": 6.238652672069921e-06, |
| "loss": 0.1961, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8423817863397548, |
| "grad_norm": 0.8709327722146551, |
| "learning_rate": 6.225321968736114e-06, |
| "loss": 0.1118, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.8441330998248686, |
| "grad_norm": 1.0680058091791873, |
| "learning_rate": 6.211981992461583e-06, |
| "loss": 0.1579, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.8458844133099825, |
| "grad_norm": 1.0598391865687347, |
| "learning_rate": 6.1986328442000425e-06, |
| "loss": 0.2064, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.8476357267950964, |
| "grad_norm": 0.8766034007901955, |
| "learning_rate": 6.185274624974627e-06, |
| "loss": 0.1729, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.8493870402802102, |
| "grad_norm": 1.0572455452106584, |
| "learning_rate": 6.1719074358771095e-06, |
| "loss": 0.1506, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.851138353765324, |
| "grad_norm": 1.191222790209331, |
| "learning_rate": 6.158531378067151e-06, |
| "loss": 0.2654, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.8528896672504378, |
| "grad_norm": 1.4428690121550316, |
| "learning_rate": 6.145146552771526e-06, |
| "loss": 0.1961, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.8546409807355516, |
| "grad_norm": 1.4737339805651568, |
| "learning_rate": 6.13175306128336e-06, |
| "loss": 0.2061, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.8563922942206655, |
| "grad_norm": 0.8182297459502176, |
| "learning_rate": 6.118351004961361e-06, |
| "loss": 0.1507, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.8581436077057794, |
| "grad_norm": 0.8288780085158164, |
| "learning_rate": 6.104940485229055e-06, |
| "loss": 0.13, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8598949211908932, |
| "grad_norm": 1.1292187916757546, |
| "learning_rate": 6.091521603574016e-06, |
| "loss": 0.1324, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.861646234676007, |
| "grad_norm": 1.0078907033539166, |
| "learning_rate": 6.0780944615471016e-06, |
| "loss": 0.1468, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.8633975481611208, |
| "grad_norm": 1.182790806253496, |
| "learning_rate": 6.064659160761676e-06, |
| "loss": 0.1444, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.8651488616462347, |
| "grad_norm": 1.2426011389183171, |
| "learning_rate": 6.051215802892855e-06, |
| "loss": 0.1864, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.8669001751313485, |
| "grad_norm": 0.9102336236721412, |
| "learning_rate": 6.03776448967672e-06, |
| "loss": 0.1639, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.8686514886164624, |
| "grad_norm": 0.8523410941646513, |
| "learning_rate": 6.024305322909565e-06, |
| "loss": 0.1501, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.8704028021015762, |
| "grad_norm": 0.8389091979384604, |
| "learning_rate": 6.0108384044471115e-06, |
| "loss": 0.1543, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.87215411558669, |
| "grad_norm": 0.9283364061815252, |
| "learning_rate": 5.997363836203744e-06, |
| "loss": 0.179, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.8739054290718039, |
| "grad_norm": 1.610431915265918, |
| "learning_rate": 5.983881720151743e-06, |
| "loss": 0.211, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.8756567425569177, |
| "grad_norm": 0.9193134177440736, |
| "learning_rate": 5.970392158320505e-06, |
| "loss": 0.171, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8756567425569177, |
| "eval_loss": 0.18447460234165192, |
| "eval_runtime": 1.9261, |
| "eval_samples_per_second": 24.401, |
| "eval_steps_per_second": 6.23, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8774080560420315, |
| "grad_norm": 1.5249299930362135, |
| "learning_rate": 5.956895252795778e-06, |
| "loss": 0.2216, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.8791593695271454, |
| "grad_norm": 0.9421038979843945, |
| "learning_rate": 5.943391105718883e-06, |
| "loss": 0.172, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.8809106830122592, |
| "grad_norm": 1.0939585065581912, |
| "learning_rate": 5.9298798192859434e-06, |
| "loss": 0.1562, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.882661996497373, |
| "grad_norm": 1.3902162886272154, |
| "learning_rate": 5.91636149574711e-06, |
| "loss": 0.2069, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.8844133099824869, |
| "grad_norm": 0.9650701747967442, |
| "learning_rate": 5.902836237405791e-06, |
| "loss": 0.1716, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.8861646234676007, |
| "grad_norm": 1.083475666382535, |
| "learning_rate": 5.889304146617878e-06, |
| "loss": 0.1473, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.8879159369527145, |
| "grad_norm": 0.832482252729812, |
| "learning_rate": 5.875765325790963e-06, |
| "loss": 0.1003, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.8896672504378283, |
| "grad_norm": 1.096304873507722, |
| "learning_rate": 5.8622198773835725e-06, |
| "loss": 0.1779, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.8914185639229422, |
| "grad_norm": 0.6978941340044295, |
| "learning_rate": 5.8486679039043895e-06, |
| "loss": 0.0972, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.8931698774080561, |
| "grad_norm": 0.9622846696182903, |
| "learning_rate": 5.835109507911475e-06, |
| "loss": 0.1651, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8949211908931699, |
| "grad_norm": 1.2738814460850292, |
| "learning_rate": 5.821544792011495e-06, |
| "loss": 0.1897, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.8966725043782837, |
| "grad_norm": 1.1576090771526317, |
| "learning_rate": 5.807973858858947e-06, |
| "loss": 0.1617, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.8984238178633975, |
| "grad_norm": 1.0945933042426927, |
| "learning_rate": 5.794396811155372e-06, |
| "loss": 0.2411, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.9001751313485113, |
| "grad_norm": 1.0071156666849748, |
| "learning_rate": 5.780813751648589e-06, |
| "loss": 0.145, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.9019264448336253, |
| "grad_norm": 1.0608443685972735, |
| "learning_rate": 5.76722478313191e-06, |
| "loss": 0.1602, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.9036777583187391, |
| "grad_norm": 0.8351677418619291, |
| "learning_rate": 5.753630008443371e-06, |
| "loss": 0.1649, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.9054290718038529, |
| "grad_norm": 1.0252931582190568, |
| "learning_rate": 5.740029530464941e-06, |
| "loss": 0.1208, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.9071803852889667, |
| "grad_norm": 1.0954354367499803, |
| "learning_rate": 5.726423452121751e-06, |
| "loss": 0.1731, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.9089316987740805, |
| "grad_norm": 0.9910985812759849, |
| "learning_rate": 5.712811876381318e-06, |
| "loss": 0.185, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.9106830122591943, |
| "grad_norm": 0.9431196895717147, |
| "learning_rate": 5.699194906252761e-06, |
| "loss": 0.203, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9124343257443083, |
| "grad_norm": 0.7613500880928905, |
| "learning_rate": 5.685572644786016e-06, |
| "loss": 0.1142, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.9141856392294221, |
| "grad_norm": 0.9561738575523392, |
| "learning_rate": 5.671945195071075e-06, |
| "loss": 0.1474, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.9159369527145359, |
| "grad_norm": 1.0351076685823428, |
| "learning_rate": 5.65831266023718e-06, |
| "loss": 0.1973, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.9176882661996497, |
| "grad_norm": 1.2139682431989942, |
| "learning_rate": 5.644675143452065e-06, |
| "loss": 0.2251, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.9194395796847635, |
| "grad_norm": 1.1432766151339988, |
| "learning_rate": 5.631032747921165e-06, |
| "loss": 0.2148, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.9211908931698775, |
| "grad_norm": 0.8384192320224637, |
| "learning_rate": 5.617385576886829e-06, |
| "loss": 0.124, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.9229422066549913, |
| "grad_norm": 0.7477784220288675, |
| "learning_rate": 5.603733733627559e-06, |
| "loss": 0.1244, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.9246935201401051, |
| "grad_norm": 0.8136618533131901, |
| "learning_rate": 5.5900773214572016e-06, |
| "loss": 0.1652, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.9264448336252189, |
| "grad_norm": 0.9923459165132333, |
| "learning_rate": 5.576416443724187e-06, |
| "loss": 0.1719, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.9281961471103327, |
| "grad_norm": 1.2728780007916458, |
| "learning_rate": 5.562751203810742e-06, |
| "loss": 0.1844, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9299474605954466, |
| "grad_norm": 0.9234166515823709, |
| "learning_rate": 5.5490817051320964e-06, |
| "loss": 0.1612, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.9316987740805605, |
| "grad_norm": 1.0361866511885336, |
| "learning_rate": 5.535408051135721e-06, |
| "loss": 0.1428, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.9334500875656743, |
| "grad_norm": 0.9374383845356417, |
| "learning_rate": 5.5217303453005225e-06, |
| "loss": 0.1787, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.9352014010507881, |
| "grad_norm": 1.0824506841717698, |
| "learning_rate": 5.508048691136075e-06, |
| "loss": 0.1846, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.9369527145359019, |
| "grad_norm": 1.2681101493420375, |
| "learning_rate": 5.4943631921818365e-06, |
| "loss": 0.1857, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.9387040280210157, |
| "grad_norm": 0.8870751536018538, |
| "learning_rate": 5.480673952006355e-06, |
| "loss": 0.1893, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.9404553415061296, |
| "grad_norm": 0.8698308010341032, |
| "learning_rate": 5.466981074206493e-06, |
| "loss": 0.1576, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.9422066549912435, |
| "grad_norm": 1.1493313359852635, |
| "learning_rate": 5.453284662406646e-06, |
| "loss": 0.1915, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.9439579684763573, |
| "grad_norm": 0.9597895796258253, |
| "learning_rate": 5.439584820257949e-06, |
| "loss": 0.1799, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.9457092819614711, |
| "grad_norm": 0.9439514805423415, |
| "learning_rate": 5.425881651437499e-06, |
| "loss": 0.1466, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9474605954465849, |
| "grad_norm": 0.9974103027552716, |
| "learning_rate": 5.412175259647567e-06, |
| "loss": 0.1623, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.9492119089316988, |
| "grad_norm": 0.8671577413147288, |
| "learning_rate": 5.398465748614815e-06, |
| "loss": 0.1989, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.9509632224168126, |
| "grad_norm": 0.8733255413441741, |
| "learning_rate": 5.384753222089515e-06, |
| "loss": 0.1228, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.9527145359019265, |
| "grad_norm": 1.3146328909933664, |
| "learning_rate": 5.371037783844752e-06, |
| "loss": 0.2122, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.9544658493870403, |
| "grad_norm": 1.1770390858844189, |
| "learning_rate": 5.357319537675655e-06, |
| "loss": 0.2062, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.9562171628721541, |
| "grad_norm": 1.0962088909147447, |
| "learning_rate": 5.3435985873985926e-06, |
| "loss": 0.188, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.957968476357268, |
| "grad_norm": 1.2179282929078772, |
| "learning_rate": 5.329875036850406e-06, |
| "loss": 0.1765, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.9597197898423818, |
| "grad_norm": 1.069605096067716, |
| "learning_rate": 5.31614898988761e-06, |
| "loss": 0.1565, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.9614711033274956, |
| "grad_norm": 1.3962825793212799, |
| "learning_rate": 5.302420550385612e-06, |
| "loss": 0.2066, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.9632224168126094, |
| "grad_norm": 1.008584413162853, |
| "learning_rate": 5.28868982223793e-06, |
| "loss": 0.1698, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9649737302977233, |
| "grad_norm": 0.998912078130381, |
| "learning_rate": 5.274956909355395e-06, |
| "loss": 0.179, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.9667250437828371, |
| "grad_norm": 0.9297277664472026, |
| "learning_rate": 5.261221915665375e-06, |
| "loss": 0.1184, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.968476357267951, |
| "grad_norm": 1.185642958138451, |
| "learning_rate": 5.247484945110988e-06, |
| "loss": 0.1932, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.9702276707530648, |
| "grad_norm": 1.088829726983837, |
| "learning_rate": 5.233746101650308e-06, |
| "loss": 0.2206, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.9719789842381786, |
| "grad_norm": 1.0987062412828756, |
| "learning_rate": 5.220005489255583e-06, |
| "loss": 0.1554, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.9737302977232924, |
| "grad_norm": 1.0323763199957168, |
| "learning_rate": 5.20626321191245e-06, |
| "loss": 0.1546, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.9754816112084063, |
| "grad_norm": 0.9972604317206961, |
| "learning_rate": 5.192519373619145e-06, |
| "loss": 0.1742, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.9772329246935202, |
| "grad_norm": 1.1779226698001648, |
| "learning_rate": 5.1787740783857164e-06, |
| "loss": 0.1969, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.978984238178634, |
| "grad_norm": 1.1733643102354534, |
| "learning_rate": 5.165027430233239e-06, |
| "loss": 0.138, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.9807355516637478, |
| "grad_norm": 1.0272420360834542, |
| "learning_rate": 5.151279533193027e-06, |
| "loss": 0.1705, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.9824868651488616, |
| "grad_norm": 0.904519502078042, |
| "learning_rate": 5.137530491305844e-06, |
| "loss": 0.1255, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.9842381786339754, |
| "grad_norm": 0.9629383999654443, |
| "learning_rate": 5.123780408621118e-06, |
| "loss": 0.1659, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.9859894921190894, |
| "grad_norm": 1.3124846848756935, |
| "learning_rate": 5.110029389196155e-06, |
| "loss": 0.1844, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.9877408056042032, |
| "grad_norm": 1.0676108653291219, |
| "learning_rate": 5.096277537095348e-06, |
| "loss": 0.2078, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.989492119089317, |
| "grad_norm": 0.9382113481780886, |
| "learning_rate": 5.082524956389394e-06, |
| "loss": 0.1409, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.9912434325744308, |
| "grad_norm": 0.9936394728663424, |
| "learning_rate": 5.0687717511545e-06, |
| "loss": 0.2063, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.9929947460595446, |
| "grad_norm": 1.031862421429508, |
| "learning_rate": 5.055018025471602e-06, |
| "loss": 0.1595, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.9947460595446584, |
| "grad_norm": 1.3217031605637353, |
| "learning_rate": 5.0412638834255755e-06, |
| "loss": 0.1276, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.9964973730297724, |
| "grad_norm": 1.1933242590091766, |
| "learning_rate": 5.027509429104443e-06, |
| "loss": 0.1923, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.9982486865148862, |
| "grad_norm": 0.8661254447213783, |
| "learning_rate": 5.013754766598599e-06, |
| "loss": 0.1724, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.1986284951562434, |
| "learning_rate": 5e-06, |
| "loss": 0.1998, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.001751313485114, |
| "grad_norm": 0.7771391696807222, |
| "learning_rate": 4.986245233401403e-06, |
| "loss": 0.1012, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.0035026269702276, |
| "grad_norm": 0.6737848363676279, |
| "learning_rate": 4.9724905708955575e-06, |
| "loss": 0.0784, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.0052539404553416, |
| "grad_norm": 0.6454263064830977, |
| "learning_rate": 4.958736116574426e-06, |
| "loss": 0.0818, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.0070052539404553, |
| "grad_norm": 0.6725211290025607, |
| "learning_rate": 4.9449819745284e-06, |
| "loss": 0.0843, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.0087565674255692, |
| "grad_norm": 0.8469678132359898, |
| "learning_rate": 4.931228248845502e-06, |
| "loss": 0.1477, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.010507880910683, |
| "grad_norm": 0.7442182106279001, |
| "learning_rate": 4.9174750436106076e-06, |
| "loss": 0.0892, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.0122591943957968, |
| "grad_norm": 0.7468024313770749, |
| "learning_rate": 4.903722462904653e-06, |
| "loss": 0.0948, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.0140105078809107, |
| "grad_norm": 0.7166080541638878, |
| "learning_rate": 4.889970610803845e-06, |
| "loss": 0.0991, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.0157618213660244, |
| "grad_norm": 0.7762519026289294, |
| "learning_rate": 4.8762195913788825e-06, |
| "loss": 0.0774, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.0175131348511384, |
| "grad_norm": 0.808868055025971, |
| "learning_rate": 4.862469508694157e-06, |
| "loss": 0.1099, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.0192644483362523, |
| "grad_norm": 0.5819265746424057, |
| "learning_rate": 4.8487204668069735e-06, |
| "loss": 0.0695, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.021015761821366, |
| "grad_norm": 0.6156072907006408, |
| "learning_rate": 4.834972569766762e-06, |
| "loss": 0.0838, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.02276707530648, |
| "grad_norm": 0.7635944643048334, |
| "learning_rate": 4.8212259216142835e-06, |
| "loss": 0.104, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.0245183887915936, |
| "grad_norm": 0.7637571188691638, |
| "learning_rate": 4.8074806263808565e-06, |
| "loss": 0.0708, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.0262697022767076, |
| "grad_norm": 0.9551589547852135, |
| "learning_rate": 4.7937367880875514e-06, |
| "loss": 0.1281, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.0280210157618215, |
| "grad_norm": 0.7293559825967365, |
| "learning_rate": 4.779994510744419e-06, |
| "loss": 0.0912, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.0297723292469352, |
| "grad_norm": 0.8566789761394177, |
| "learning_rate": 4.766253898349694e-06, |
| "loss": 0.1606, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.031523642732049, |
| "grad_norm": 0.6627533346656157, |
| "learning_rate": 4.752515054889012e-06, |
| "loss": 0.0718, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.0332749562171628, |
| "grad_norm": 0.5815945433400204, |
| "learning_rate": 4.738778084334625e-06, |
| "loss": 0.0695, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.0350262697022767, |
| "grad_norm": 0.760838011156959, |
| "learning_rate": 4.725043090644606e-06, |
| "loss": 0.0884, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.0367775831873904, |
| "grad_norm": 0.7483668041512324, |
| "learning_rate": 4.711310177762072e-06, |
| "loss": 0.0669, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.0385288966725044, |
| "grad_norm": 0.8799384240976829, |
| "learning_rate": 4.697579449614389e-06, |
| "loss": 0.0998, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.0402802101576183, |
| "grad_norm": 0.8082415948151013, |
| "learning_rate": 4.683851010112391e-06, |
| "loss": 0.0985, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.042031523642732, |
| "grad_norm": 0.6580267314761, |
| "learning_rate": 4.670124963149596e-06, |
| "loss": 0.1115, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.043782837127846, |
| "grad_norm": 0.7118138502690213, |
| "learning_rate": 4.656401412601408e-06, |
| "loss": 0.0662, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.0455341506129596, |
| "grad_norm": 0.7285566494410792, |
| "learning_rate": 4.642680462324348e-06, |
| "loss": 0.1036, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.0472854640980735, |
| "grad_norm": 0.6952238511426204, |
| "learning_rate": 4.628962216155249e-06, |
| "loss": 0.0882, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.0490367775831875, |
| "grad_norm": 0.7543264293172796, |
| "learning_rate": 4.615246777910485e-06, |
| "loss": 0.087, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.0507880910683012, |
| "grad_norm": 0.6191909832818346, |
| "learning_rate": 4.6015342513851854e-06, |
| "loss": 0.0745, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.052539404553415, |
| "grad_norm": 0.922433858210772, |
| "learning_rate": 4.587824740352435e-06, |
| "loss": 0.1058, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.0542907180385288, |
| "grad_norm": 0.6821453050325335, |
| "learning_rate": 4.5741183485625044e-06, |
| "loss": 0.0771, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.0560420315236427, |
| "grad_norm": 0.9489221869695271, |
| "learning_rate": 4.560415179742052e-06, |
| "loss": 0.0955, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.0577933450087567, |
| "grad_norm": 0.7026344715382692, |
| "learning_rate": 4.546715337593354e-06, |
| "loss": 0.0819, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.0595446584938704, |
| "grad_norm": 0.6671067987720858, |
| "learning_rate": 4.533018925793508e-06, |
| "loss": 0.0727, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.0612959719789843, |
| "grad_norm": 0.8642821874175421, |
| "learning_rate": 4.519326047993647e-06, |
| "loss": 0.0937, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.063047285464098, |
| "grad_norm": 0.8793543522695341, |
| "learning_rate": 4.505636807818166e-06, |
| "loss": 0.1301, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.064798598949212, |
| "grad_norm": 0.8237519754604328, |
| "learning_rate": 4.491951308863926e-06, |
| "loss": 0.0825, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.0665499124343258, |
| "grad_norm": 0.8459623510016205, |
| "learning_rate": 4.478269654699478e-06, |
| "loss": 0.0821, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.0683012259194395, |
| "grad_norm": 0.8255877830028168, |
| "learning_rate": 4.464591948864281e-06, |
| "loss": 0.0842, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.0700525394045535, |
| "grad_norm": 0.8718483520086847, |
| "learning_rate": 4.4509182948679035e-06, |
| "loss": 0.0821, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.0718038528896672, |
| "grad_norm": 0.9897123154664441, |
| "learning_rate": 4.43724879618926e-06, |
| "loss": 0.1109, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.073555166374781, |
| "grad_norm": 0.8636401435184293, |
| "learning_rate": 4.423583556275814e-06, |
| "loss": 0.0904, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.0753064798598948, |
| "grad_norm": 0.8916326658281433, |
| "learning_rate": 4.409922678542799e-06, |
| "loss": 0.0695, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.0770577933450087, |
| "grad_norm": 0.6527711082304235, |
| "learning_rate": 4.396266266372443e-06, |
| "loss": 0.0512, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.0788091068301227, |
| "grad_norm": 0.7314392682365592, |
| "learning_rate": 4.382614423113171e-06, |
| "loss": 0.0772, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.0805604203152364, |
| "grad_norm": 0.7896716564759495, |
| "learning_rate": 4.368967252078838e-06, |
| "loss": 0.0837, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.0823117338003503, |
| "grad_norm": 0.8904478092266286, |
| "learning_rate": 4.355324856547936e-06, |
| "loss": 0.0984, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.084063047285464, |
| "grad_norm": 0.7997582578314417, |
| "learning_rate": 4.341687339762822e-06, |
| "loss": 0.0719, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.085814360770578, |
| "grad_norm": 0.9328053345880932, |
| "learning_rate": 4.3280548049289275e-06, |
| "loss": 0.1102, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.0875656742556918, |
| "grad_norm": 0.7402218059098891, |
| "learning_rate": 4.314427355213984e-06, |
| "loss": 0.0882, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.0893169877408055, |
| "grad_norm": 1.0242119598597839, |
| "learning_rate": 4.3008050937472424e-06, |
| "loss": 0.0971, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.0910683012259195, |
| "grad_norm": 1.078920725370126, |
| "learning_rate": 4.2871881236186835e-06, |
| "loss": 0.1252, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.0928196147110332, |
| "grad_norm": 0.8096017533492185, |
| "learning_rate": 4.273576547878252e-06, |
| "loss": 0.0717, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.094570928196147, |
| "grad_norm": 0.7940757501651525, |
| "learning_rate": 4.259970469535061e-06, |
| "loss": 0.1207, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.096322241681261, |
| "grad_norm": 0.6730235221485272, |
| "learning_rate": 4.24636999155663e-06, |
| "loss": 0.0642, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.0980735551663747, |
| "grad_norm": 0.8410423617949386, |
| "learning_rate": 4.2327752168680904e-06, |
| "loss": 0.1123, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.0998248686514887, |
| "grad_norm": 0.7165164918449669, |
| "learning_rate": 4.219186248351413e-06, |
| "loss": 0.1079, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.1015761821366024, |
| "grad_norm": 0.9046144772499766, |
| "learning_rate": 4.20560318884463e-06, |
| "loss": 0.0889, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.1033274956217163, |
| "grad_norm": 0.6944164438470994, |
| "learning_rate": 4.192026141141054e-06, |
| "loss": 0.0726, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.1050788091068302, |
| "grad_norm": 0.6933343999073917, |
| "learning_rate": 4.178455207988504e-06, |
| "loss": 0.103, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.106830122591944, |
| "grad_norm": 0.9114549106270846, |
| "learning_rate": 4.164890492088527e-06, |
| "loss": 0.0816, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.1085814360770578, |
| "grad_norm": 0.8832236692461997, |
| "learning_rate": 4.151332096095613e-06, |
| "loss": 0.0716, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.1103327495621715, |
| "grad_norm": 0.6560477009246709, |
| "learning_rate": 4.13778012261643e-06, |
| "loss": 0.0495, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.1120840630472855, |
| "grad_norm": 0.7697309148882717, |
| "learning_rate": 4.124234674209038e-06, |
| "loss": 0.0784, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.1138353765323994, |
| "grad_norm": 0.8584207785015194, |
| "learning_rate": 4.110695853382123e-06, |
| "loss": 0.0838, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.115586690017513, |
| "grad_norm": 0.8178593101603066, |
| "learning_rate": 4.09716376259421e-06, |
| "loss": 0.0885, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.117338003502627, |
| "grad_norm": 0.7493368271272891, |
| "learning_rate": 4.083638504252891e-06, |
| "loss": 0.0755, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.1190893169877407, |
| "grad_norm": 0.8921855827679555, |
| "learning_rate": 4.070120180714059e-06, |
| "loss": 0.1016, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.1208406304728546, |
| "grad_norm": 0.6566522468773398, |
| "learning_rate": 4.056608894281118e-06, |
| "loss": 0.0621, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.1225919439579686, |
| "grad_norm": 1.0368825738027343, |
| "learning_rate": 4.043104747204222e-06, |
| "loss": 0.0964, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.1243432574430823, |
| "grad_norm": 0.7545101954718236, |
| "learning_rate": 4.029607841679496e-06, |
| "loss": 0.0743, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.1260945709281962, |
| "grad_norm": 0.7795780796724158, |
| "learning_rate": 4.016118279848259e-06, |
| "loss": 0.0818, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.12784588441331, |
| "grad_norm": 0.8102419707778211, |
| "learning_rate": 4.002636163796259e-06, |
| "loss": 0.0601, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.1295971978984238, |
| "grad_norm": 0.8912319933395433, |
| "learning_rate": 3.989161595552891e-06, |
| "loss": 0.1056, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.1313485113835378, |
| "grad_norm": 0.9227762274712196, |
| "learning_rate": 3.975694677090436e-06, |
| "loss": 0.0946, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.1330998248686515, |
| "grad_norm": 1.0277331150492526, |
| "learning_rate": 3.9622355103232805e-06, |
| "loss": 0.0943, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.1348511383537654, |
| "grad_norm": 0.7898037783030375, |
| "learning_rate": 3.948784197107146e-06, |
| "loss": 0.0724, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.136602451838879, |
| "grad_norm": 0.8906513390558273, |
| "learning_rate": 3.935340839238325e-06, |
| "loss": 0.0978, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.138353765323993, |
| "grad_norm": 0.8134210787173696, |
| "learning_rate": 3.9219055384529e-06, |
| "loss": 0.0743, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.140105078809107, |
| "grad_norm": 0.7115092884012872, |
| "learning_rate": 3.9084783964259855e-06, |
| "loss": 0.0492, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.1418563922942206, |
| "grad_norm": 0.7227199245003441, |
| "learning_rate": 3.895059514770947e-06, |
| "loss": 0.089, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.1436077057793346, |
| "grad_norm": 0.8212458686585175, |
| "learning_rate": 3.88164899503864e-06, |
| "loss": 0.0873, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.1453590192644483, |
| "grad_norm": 0.6189120343911374, |
| "learning_rate": 3.868246938716643e-06, |
| "loss": 0.0519, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.1471103327495622, |
| "grad_norm": 0.9135905892477679, |
| "learning_rate": 3.854853447228475e-06, |
| "loss": 0.0815, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.1488616462346761, |
| "grad_norm": 0.9650411991425408, |
| "learning_rate": 3.841468621932851e-06, |
| "loss": 0.0864, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.1506129597197898, |
| "grad_norm": 0.8715517114024203, |
| "learning_rate": 3.828092564122893e-06, |
| "loss": 0.0808, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.1523642732049038, |
| "grad_norm": 0.7812734597619906, |
| "learning_rate": 3.814725375025376e-06, |
| "loss": 0.0681, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.1541155866900175, |
| "grad_norm": 0.9464561590385424, |
| "learning_rate": 3.801367155799959e-06, |
| "loss": 0.0967, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.1558669001751314, |
| "grad_norm": 0.8197419560752517, |
| "learning_rate": 3.788018007538419e-06, |
| "loss": 0.0876, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.157618213660245, |
| "grad_norm": 0.7844620890716654, |
| "learning_rate": 3.774678031263887e-06, |
| "loss": 0.0949, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.159369527145359, |
| "grad_norm": 0.6684997290021009, |
| "learning_rate": 3.7613473279300804e-06, |
| "loss": 0.0659, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.161120840630473, |
| "grad_norm": 0.6607818899113638, |
| "learning_rate": 3.7480259984205426e-06, |
| "loss": 0.0797, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.1628721541155866, |
| "grad_norm": 0.8000383670556351, |
| "learning_rate": 3.734714143547879e-06, |
| "loss": 0.0982, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.1646234676007006, |
| "grad_norm": 0.7750030818236461, |
| "learning_rate": 3.7214118640529894e-06, |
| "loss": 0.0755, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.1663747810858143, |
| "grad_norm": 0.7718230303441634, |
| "learning_rate": 3.708119260604317e-06, |
| "loss": 0.0775, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.1681260945709282, |
| "grad_norm": 0.7606884085579109, |
| "learning_rate": 3.694836433797071e-06, |
| "loss": 0.0652, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.1698774080560421, |
| "grad_norm": 0.9648178738887017, |
| "learning_rate": 3.681563484152477e-06, |
| "loss": 0.0892, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.1716287215411558, |
| "grad_norm": 1.0030218510210618, |
| "learning_rate": 3.668300512117014e-06, |
| "loss": 0.0996, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.1733800350262698, |
| "grad_norm": 0.602908672449069, |
| "learning_rate": 3.655047618061648e-06, |
| "loss": 0.0583, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.1751313485113835, |
| "grad_norm": 0.9365604274710315, |
| "learning_rate": 3.6418049022810843e-06, |
| "loss": 0.0884, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.1768826619964974, |
| "grad_norm": 0.9882368559103631, |
| "learning_rate": 3.6285724649929944e-06, |
| "loss": 0.1015, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.178633975481611, |
| "grad_norm": 0.965569447935582, |
| "learning_rate": 3.615350406337269e-06, |
| "loss": 0.097, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.180385288966725, |
| "grad_norm": 1.0268385342532949, |
| "learning_rate": 3.6021388263752566e-06, |
| "loss": 0.1107, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.182136602451839, |
| "grad_norm": 0.9192005858295202, |
| "learning_rate": 3.588937825088999e-06, |
| "loss": 0.1, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.1838879159369526, |
| "grad_norm": 0.9617400889226272, |
| "learning_rate": 3.5757475023804907e-06, |
| "loss": 0.0692, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.1856392294220666, |
| "grad_norm": 0.8649057821650563, |
| "learning_rate": 3.562567958070905e-06, |
| "loss": 0.1033, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.1873905429071803, |
| "grad_norm": 1.0420162692258215, |
| "learning_rate": 3.549399291899851e-06, |
| "loss": 0.099, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.1891418563922942, |
| "grad_norm": 0.5439954275486397, |
| "learning_rate": 3.536241603524616e-06, |
| "loss": 0.0514, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.1908931698774081, |
| "grad_norm": 0.8221945556326056, |
| "learning_rate": 3.5230949925194034e-06, |
| "loss": 0.0841, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.1926444833625218, |
| "grad_norm": 1.0298523052786546, |
| "learning_rate": 3.5099595583745947e-06, |
| "loss": 0.102, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.1943957968476357, |
| "grad_norm": 0.9729770963676866, |
| "learning_rate": 3.4968354004959804e-06, |
| "loss": 0.0959, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.1961471103327495, |
| "grad_norm": 0.8687339216607315, |
| "learning_rate": 3.4837226182040184e-06, |
| "loss": 0.0723, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.1978984238178634, |
| "grad_norm": 1.1187226087659867, |
| "learning_rate": 3.470621310733078e-06, |
| "loss": 0.1072, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.1996497373029773, |
| "grad_norm": 0.8518811353036616, |
| "learning_rate": 3.4575315772306894e-06, |
| "loss": 0.1147, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.201401050788091, |
| "grad_norm": 1.0529027501197052, |
| "learning_rate": 3.444453516756796e-06, |
| "loss": 0.1036, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.203152364273205, |
| "grad_norm": 1.0032377659240923, |
| "learning_rate": 3.4313872282829998e-06, |
| "loss": 0.1128, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.2049036777583186, |
| "grad_norm": 1.0018061327702659, |
| "learning_rate": 3.4183328106918177e-06, |
| "loss": 0.092, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.2066549912434326, |
| "grad_norm": 0.8566035601149397, |
| "learning_rate": 3.4052903627759264e-06, |
| "loss": 0.0936, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.2084063047285465, |
| "grad_norm": 0.9125746690092138, |
| "learning_rate": 3.3922599832374224e-06, |
| "loss": 0.0788, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.2101576182136602, |
| "grad_norm": 1.0426073296986158, |
| "learning_rate": 3.379241770687074e-06, |
| "loss": 0.0799, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.2119089316987741, |
| "grad_norm": 1.06100141802292, |
| "learning_rate": 3.3662358236435664e-06, |
| "loss": 0.1105, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.2136602451838878, |
| "grad_norm": 0.9390043654644253, |
| "learning_rate": 3.353242240532769e-06, |
| "loss": 0.1165, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.2154115586690017, |
| "grad_norm": 0.8503852226528796, |
| "learning_rate": 3.3402611196869764e-06, |
| "loss": 0.1161, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.2171628721541157, |
| "grad_norm": 0.9078220049525988, |
| "learning_rate": 3.327292559344178e-06, |
| "loss": 0.0681, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.2189141856392294, |
| "grad_norm": 0.7127386687186886, |
| "learning_rate": 3.314336657647308e-06, |
| "loss": 0.0752, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.2206654991243433, |
| "grad_norm": 0.9358408817071951, |
| "learning_rate": 3.3013935126434994e-06, |
| "loss": 0.1019, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.222416812609457, |
| "grad_norm": 0.7742420094838459, |
| "learning_rate": 3.288463222283349e-06, |
| "loss": 0.0931, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.224168126094571, |
| "grad_norm": 0.941039147307977, |
| "learning_rate": 3.2755458844201692e-06, |
| "loss": 0.094, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.2259194395796849, |
| "grad_norm": 0.8228135353803155, |
| "learning_rate": 3.262641596809254e-06, |
| "loss": 0.0752, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2276707530647986, |
| "grad_norm": 1.0260217425221851, |
| "learning_rate": 3.249750457107138e-06, |
| "loss": 0.1434, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.2294220665499125, |
| "grad_norm": 0.8764995569070175, |
| "learning_rate": 3.2368725628708507e-06, |
| "loss": 0.0942, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.2311733800350262, |
| "grad_norm": 0.8810808130600745, |
| "learning_rate": 3.224008011557186e-06, |
| "loss": 0.0773, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.2329246935201401, |
| "grad_norm": 0.8339945067432337, |
| "learning_rate": 3.211156900521961e-06, |
| "loss": 0.0577, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.234676007005254, |
| "grad_norm": 0.9539133650050169, |
| "learning_rate": 3.1983193270192787e-06, |
| "loss": 0.0854, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.2364273204903677, |
| "grad_norm": 0.7988973560578649, |
| "learning_rate": 3.185495388200799e-06, |
| "loss": 0.0718, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.2381786339754817, |
| "grad_norm": 1.074214838443568, |
| "learning_rate": 3.1726851811149907e-06, |
| "loss": 0.0927, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.2399299474605954, |
| "grad_norm": 0.8023394631186075, |
| "learning_rate": 3.159888802706408e-06, |
| "loss": 0.076, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.2416812609457093, |
| "grad_norm": 0.6592103260258445, |
| "learning_rate": 3.147106349814951e-06, |
| "loss": 0.0602, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.2434325744308232, |
| "grad_norm": 1.1114735422528423, |
| "learning_rate": 3.1343379191751366e-06, |
| "loss": 0.0904, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.245183887915937, |
| "grad_norm": 0.8722207507134669, |
| "learning_rate": 3.1215836074153666e-06, |
| "loss": 0.0553, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.2469352014010509, |
| "grad_norm": 0.9701640956470772, |
| "learning_rate": 3.1088435110571884e-06, |
| "loss": 0.0951, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.2486865148861646, |
| "grad_norm": 0.7785105550035741, |
| "learning_rate": 3.0961177265145776e-06, |
| "loss": 0.0744, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.2504378283712785, |
| "grad_norm": 1.1865316263907835, |
| "learning_rate": 3.0834063500931947e-06, |
| "loss": 0.1155, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.2521891418563924, |
| "grad_norm": 0.670146861520249, |
| "learning_rate": 3.0707094779896695e-06, |
| "loss": 0.0737, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.253940455341506, |
| "grad_norm": 1.0183130513166565, |
| "learning_rate": 3.0580272062908605e-06, |
| "loss": 0.1009, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.25569176882662, |
| "grad_norm": 1.3895554101906575, |
| "learning_rate": 3.0453596309731396e-06, |
| "loss": 0.1295, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.2574430823117337, |
| "grad_norm": 0.8401700996571929, |
| "learning_rate": 3.032706847901658e-06, |
| "loss": 0.1052, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.2591943957968477, |
| "grad_norm": 0.9557568744741772, |
| "learning_rate": 3.020068952829619e-06, |
| "loss": 0.1099, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.2609457092819616, |
| "grad_norm": 0.7781143616664629, |
| "learning_rate": 3.0074460413975636e-06, |
| "loss": 0.0603, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.2626970227670753, |
| "grad_norm": 0.8080335461170193, |
| "learning_rate": 2.9948382091326328e-06, |
| "loss": 0.0971, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.2644483362521892, |
| "grad_norm": 0.8725117432766293, |
| "learning_rate": 2.98224555144786e-06, |
| "loss": 0.0565, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.266199649737303, |
| "grad_norm": 1.0281518070875828, |
| "learning_rate": 2.9696681636414372e-06, |
| "loss": 0.1, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.2679509632224168, |
| "grad_norm": 0.9107780574366331, |
| "learning_rate": 2.9571061408959943e-06, |
| "loss": 0.0812, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.2697022767075308, |
| "grad_norm": 0.8277139085800029, |
| "learning_rate": 2.944559578277889e-06, |
| "loss": 0.0669, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.2714535901926445, |
| "grad_norm": 0.8815637667439311, |
| "learning_rate": 2.932028570736474e-06, |
| "loss": 0.097, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.2732049036777582, |
| "grad_norm": 1.1364269200108077, |
| "learning_rate": 2.919513213103391e-06, |
| "loss": 0.0706, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.274956217162872, |
| "grad_norm": 0.7498765483852221, |
| "learning_rate": 2.9070136000918426e-06, |
| "loss": 0.0687, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.276707530647986, |
| "grad_norm": 0.7962376754602092, |
| "learning_rate": 2.89452982629588e-06, |
| "loss": 0.0609, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.2784588441331, |
| "grad_norm": 1.0565112515662811, |
| "learning_rate": 2.8820619861896908e-06, |
| "loss": 0.089, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.2802101576182137, |
| "grad_norm": 1.0984844227065096, |
| "learning_rate": 2.8696101741268765e-06, |
| "loss": 0.0609, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.2819614711033274, |
| "grad_norm": 1.3486910096875888, |
| "learning_rate": 2.8571744843397412e-06, |
| "loss": 0.1295, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.2837127845884413, |
| "grad_norm": 0.7596179516412704, |
| "learning_rate": 2.844755010938586e-06, |
| "loss": 0.0552, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.2854640980735552, |
| "grad_norm": 0.8293822539041598, |
| "learning_rate": 2.8323518479109824e-06, |
| "loss": 0.0673, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.287215411558669, |
| "grad_norm": 1.0021410590514732, |
| "learning_rate": 2.819965089121076e-06, |
| "loss": 0.079, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.2889667250437828, |
| "grad_norm": 0.98205909089748, |
| "learning_rate": 2.8075948283088637e-06, |
| "loss": 0.0956, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.2907180385288965, |
| "grad_norm": 0.9484379433282005, |
| "learning_rate": 2.7952411590894914e-06, |
| "loss": 0.0836, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.2924693520140105, |
| "grad_norm": 0.8548057872579898, |
| "learning_rate": 2.7829041749525455e-06, |
| "loss": 0.0698, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.2942206654991244, |
| "grad_norm": 0.6958930931575743, |
| "learning_rate": 2.77058396926134e-06, |
| "loss": 0.0472, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.295971978984238, |
| "grad_norm": 0.8590285326110315, |
| "learning_rate": 2.7582806352522194e-06, |
| "loss": 0.1035, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.297723292469352, |
| "grad_norm": 0.7506949834567328, |
| "learning_rate": 2.7459942660338434e-06, |
| "loss": 0.0844, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.2994746059544657, |
| "grad_norm": 0.718144378394281, |
| "learning_rate": 2.733724954586483e-06, |
| "loss": 0.072, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.3012259194395797, |
| "grad_norm": 0.9554449618063786, |
| "learning_rate": 2.7214727937613293e-06, |
| "loss": 0.0738, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.3029772329246936, |
| "grad_norm": 1.281934191339505, |
| "learning_rate": 2.709237876279772e-06, |
| "loss": 0.0861, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.3047285464098073, |
| "grad_norm": 0.8591603075335503, |
| "learning_rate": 2.6970202947327156e-06, |
| "loss": 0.0738, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.3064798598949212, |
| "grad_norm": 1.077686551423765, |
| "learning_rate": 2.6848201415798646e-06, |
| "loss": 0.1006, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.308231173380035, |
| "grad_norm": 1.0140620923015204, |
| "learning_rate": 2.6726375091490313e-06, |
| "loss": 0.1179, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.3099824868651488, |
| "grad_norm": 0.8465851580721724, |
| "learning_rate": 2.6604724896354338e-06, |
| "loss": 0.095, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.3117338003502628, |
| "grad_norm": 0.8292233955682955, |
| "learning_rate": 2.648325175101004e-06, |
| "loss": 0.078, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.3134851138353765, |
| "grad_norm": 0.9961150572832315, |
| "learning_rate": 2.6361956574736867e-06, |
| "loss": 0.1037, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.3152364273204904, |
| "grad_norm": 1.0125252057907417, |
| "learning_rate": 2.624084028546739e-06, |
| "loss": 0.1302, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.316987740805604, |
| "grad_norm": 1.4492932570805932, |
| "learning_rate": 2.6119903799780445e-06, |
| "loss": 0.1018, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.318739054290718, |
| "grad_norm": 1.0313545636415973, |
| "learning_rate": 2.5999148032894116e-06, |
| "loss": 0.1301, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.320490367775832, |
| "grad_norm": 0.7640005278295612, |
| "learning_rate": 2.587857389865891e-06, |
| "loss": 0.0705, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.3222416812609457, |
| "grad_norm": 0.9241800146530195, |
| "learning_rate": 2.5758182309550773e-06, |
| "loss": 0.1024, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.3239929947460596, |
| "grad_norm": 0.8476677958931723, |
| "learning_rate": 2.5637974176664156e-06, |
| "loss": 0.075, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.3257443082311733, |
| "grad_norm": 0.9539049936089635, |
| "learning_rate": 2.5517950409705173e-06, |
| "loss": 0.0732, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.3274956217162872, |
| "grad_norm": 0.7787415845648707, |
| "learning_rate": 2.539811191698469e-06, |
| "loss": 0.068, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.3292469352014011, |
| "grad_norm": 0.6740960966163063, |
| "learning_rate": 2.52784596054115e-06, |
| "loss": 0.0807, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.3309982486865148, |
| "grad_norm": 0.9272432887557794, |
| "learning_rate": 2.5158994380485403e-06, |
| "loss": 0.1073, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.3327495621716288, |
| "grad_norm": 0.805162044866896, |
| "learning_rate": 2.5039717146290365e-06, |
| "loss": 0.1363, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.3345008756567425, |
| "grad_norm": 0.9017129027314272, |
| "learning_rate": 2.4920628805487684e-06, |
| "loss": 0.093, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.3362521891418564, |
| "grad_norm": 0.9079872727729905, |
| "learning_rate": 2.4801730259309136e-06, |
| "loss": 0.0808, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.3380035026269703, |
| "grad_norm": 0.7596071384023176, |
| "learning_rate": 2.468302240755023e-06, |
| "loss": 0.0811, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.339754816112084, |
| "grad_norm": 0.8763250650248721, |
| "learning_rate": 2.456450614856333e-06, |
| "loss": 0.0887, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.341506129597198, |
| "grad_norm": 0.9923301577394401, |
| "learning_rate": 2.4446182379250843e-06, |
| "loss": 0.0893, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.3432574430823117, |
| "grad_norm": 0.8633434639363498, |
| "learning_rate": 2.4328051995058482e-06, |
| "loss": 0.088, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.3450087565674256, |
| "grad_norm": 1.0475821217828167, |
| "learning_rate": 2.4210115889968446e-06, |
| "loss": 0.0924, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.3467600700525395, |
| "grad_norm": 0.9107712943754082, |
| "learning_rate": 2.409237495649271e-06, |
| "loss": 0.0728, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.3485113835376532, |
| "grad_norm": 1.5257662309673086, |
| "learning_rate": 2.397483008566624e-06, |
| "loss": 0.1125, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.3502626970227671, |
| "grad_norm": 0.876528919503985, |
| "learning_rate": 2.3857482167040215e-06, |
| "loss": 0.0974, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.3520140105078808, |
| "grad_norm": 0.924196021469823, |
| "learning_rate": 2.374033208867534e-06, |
| "loss": 0.0915, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.3537653239929948, |
| "grad_norm": 0.792737877765985, |
| "learning_rate": 2.3623380737135094e-06, |
| "loss": 0.0678, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.3555166374781087, |
| "grad_norm": 0.7778836639012523, |
| "learning_rate": 2.3506628997479085e-06, |
| "loss": 0.0653, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.3572679509632224, |
| "grad_norm": 0.8527989490917307, |
| "learning_rate": 2.339007775325629e-06, |
| "loss": 0.0833, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.3590192644483363, |
| "grad_norm": 0.7022012295601906, |
| "learning_rate": 2.3273727886498372e-06, |
| "loss": 0.0593, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.36077057793345, |
| "grad_norm": 0.957527967610434, |
| "learning_rate": 2.3157580277713004e-06, |
| "loss": 0.0669, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.362521891418564, |
| "grad_norm": 1.5092316384911828, |
| "learning_rate": 2.304163580587724e-06, |
| "loss": 0.1074, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.3642732049036779, |
| "grad_norm": 0.899000777013253, |
| "learning_rate": 2.2925895348430856e-06, |
| "loss": 0.0835, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.3660245183887916, |
| "grad_norm": 0.790034586719747, |
| "learning_rate": 2.2810359781269657e-06, |
| "loss": 0.0719, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.3677758318739055, |
| "grad_norm": 0.7907785753390943, |
| "learning_rate": 2.269502997873895e-06, |
| "loss": 0.0781, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.3695271453590192, |
| "grad_norm": 0.6926955937004445, |
| "learning_rate": 2.2579906813626807e-06, |
| "loss": 0.0728, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.3712784588441331, |
| "grad_norm": 0.6166696427484202, |
| "learning_rate": 2.246499115715751e-06, |
| "loss": 0.0644, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.373029772329247, |
| "grad_norm": 0.7255433228039453, |
| "learning_rate": 2.235028387898504e-06, |
| "loss": 0.0979, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.3747810858143608, |
| "grad_norm": 0.7617740643442561, |
| "learning_rate": 2.2235785847186338e-06, |
| "loss": 0.0924, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.3765323992994747, |
| "grad_norm": 1.0217536171541641, |
| "learning_rate": 2.212149792825489e-06, |
| "loss": 0.0979, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.3782837127845884, |
| "grad_norm": 0.8732538774678509, |
| "learning_rate": 2.2007420987094036e-06, |
| "loss": 0.0734, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.3800350262697023, |
| "grad_norm": 1.0019292167266876, |
| "learning_rate": 2.189355588701051e-06, |
| "loss": 0.1069, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.3817863397548162, |
| "grad_norm": 0.8315824634330226, |
| "learning_rate": 2.177990348970792e-06, |
| "loss": 0.0909, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.38353765323993, |
| "grad_norm": 0.647602160309111, |
| "learning_rate": 2.1666464655280133e-06, |
| "loss": 0.0812, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.3852889667250436, |
| "grad_norm": 0.9640769615135378, |
| "learning_rate": 2.1553240242204876e-06, |
| "loss": 0.0873, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.3870402802101576, |
| "grad_norm": 0.9202817145346947, |
| "learning_rate": 2.1440231107337147e-06, |
| "loss": 0.0792, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.3887915936952715, |
| "grad_norm": 0.912675153291401, |
| "learning_rate": 2.1327438105902763e-06, |
| "loss": 0.0773, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.3905429071803854, |
| "grad_norm": 0.9415049365667928, |
| "learning_rate": 2.1214862091491966e-06, |
| "loss": 0.1135, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.3922942206654991, |
| "grad_norm": 0.9970178640835594, |
| "learning_rate": 2.1102503916052797e-06, |
| "loss": 0.0847, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.3940455341506128, |
| "grad_norm": 0.9832327330756709, |
| "learning_rate": 2.0990364429884828e-06, |
| "loss": 0.1235, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.3957968476357268, |
| "grad_norm": 0.8721793805349013, |
| "learning_rate": 2.0878444481632597e-06, |
| "loss": 0.1004, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.3975481611208407, |
| "grad_norm": 0.7747485377067997, |
| "learning_rate": 2.076674491827922e-06, |
| "loss": 0.069, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.3992994746059544, |
| "grad_norm": 0.8456173036014264, |
| "learning_rate": 2.0655266585140045e-06, |
| "loss": 0.0754, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.4010507880910683, |
| "grad_norm": 0.9391798871992534, |
| "learning_rate": 2.0544010325856146e-06, |
| "loss": 0.0969, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.402802101576182, |
| "grad_norm": 0.8875537072650175, |
| "learning_rate": 2.043297698238805e-06, |
| "loss": 0.0678, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.404553415061296, |
| "grad_norm": 0.7833633854106007, |
| "learning_rate": 2.0322167395009286e-06, |
| "loss": 0.0877, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.4063047285464099, |
| "grad_norm": 0.9708805413159364, |
| "learning_rate": 2.0211582402300007e-06, |
| "loss": 0.0937, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.4080560420315236, |
| "grad_norm": 0.8290033085639599, |
| "learning_rate": 2.0101222841140775e-06, |
| "loss": 0.0722, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.4098073555166375, |
| "grad_norm": 0.7734317269111782, |
| "learning_rate": 1.9991089546706067e-06, |
| "loss": 0.0788, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.4115586690017512, |
| "grad_norm": 0.8912988277705161, |
| "learning_rate": 1.9881183352458083e-06, |
| "loss": 0.0923, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.4133099824868651, |
| "grad_norm": 0.8930959140982987, |
| "learning_rate": 1.9771505090140343e-06, |
| "loss": 0.0858, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.415061295971979, |
| "grad_norm": 1.0486079636516412, |
| "learning_rate": 1.9662055589771427e-06, |
| "loss": 0.0848, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.4168126094570928, |
| "grad_norm": 0.869268490663677, |
| "learning_rate": 1.955283567963876e-06, |
| "loss": 0.1326, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.4185639229422067, |
| "grad_norm": 0.6923293806693147, |
| "learning_rate": 1.9443846186292204e-06, |
| "loss": 0.0594, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.4203152364273204, |
| "grad_norm": 0.7354927224586634, |
| "learning_rate": 1.9335087934537956e-06, |
| "loss": 0.0806, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.4220665499124343, |
| "grad_norm": 0.8259624368436999, |
| "learning_rate": 1.9226561747432188e-06, |
| "loss": 0.0857, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.4238178633975482, |
| "grad_norm": 0.8711192744207589, |
| "learning_rate": 1.911826844627485e-06, |
| "loss": 0.0752, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.425569176882662, |
| "grad_norm": 2.6069679207164667, |
| "learning_rate": 1.901020885060353e-06, |
| "loss": 0.0866, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.4273204903677759, |
| "grad_norm": 0.8510608016527956, |
| "learning_rate": 1.8902383778187106e-06, |
| "loss": 0.1021, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.4290718038528896, |
| "grad_norm": 0.9912516765235937, |
| "learning_rate": 1.8794794045019727e-06, |
| "loss": 0.1037, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.4308231173380035, |
| "grad_norm": 1.4280563242119932, |
| "learning_rate": 1.8687440465314493e-06, |
| "loss": 0.125, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.4325744308231174, |
| "grad_norm": 0.8887782023449038, |
| "learning_rate": 1.858032385149735e-06, |
| "loss": 0.0961, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.4343257443082311, |
| "grad_norm": 0.8092352027580832, |
| "learning_rate": 1.8473445014200992e-06, |
| "loss": 0.0784, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.436077057793345, |
| "grad_norm": 0.8508624184842186, |
| "learning_rate": 1.8366804762258612e-06, |
| "loss": 0.0993, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.4378283712784588, |
| "grad_norm": 0.8476088885858866, |
| "learning_rate": 1.826040390269792e-06, |
| "loss": 0.097, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.4395796847635727, |
| "grad_norm": 0.7858581848448734, |
| "learning_rate": 1.8154243240734904e-06, |
| "loss": 0.0545, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.4413309982486866, |
| "grad_norm": 0.7511491075823152, |
| "learning_rate": 1.8048323579767796e-06, |
| "loss": 0.0614, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.4430823117338003, |
| "grad_norm": 0.8916221736098047, |
| "learning_rate": 1.7942645721371043e-06, |
| "loss": 0.0688, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.4448336252189142, |
| "grad_norm": 1.0386802641836865, |
| "learning_rate": 1.7837210465289129e-06, |
| "loss": 0.1243, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.446584938704028, |
| "grad_norm": 0.8600832619931907, |
| "learning_rate": 1.773201860943063e-06, |
| "loss": 0.0591, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.4483362521891419, |
| "grad_norm": 0.85335509068009, |
| "learning_rate": 1.7627070949862095e-06, |
| "loss": 0.0897, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.4500875656742558, |
| "grad_norm": 1.0290314674102257, |
| "learning_rate": 1.7522368280802048e-06, |
| "loss": 0.1101, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.4518388791593695, |
| "grad_norm": 0.7360509013463974, |
| "learning_rate": 1.7417911394615033e-06, |
| "loss": 0.073, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.4535901926444834, |
| "grad_norm": 1.0355631885341359, |
| "learning_rate": 1.7313701081805506e-06, |
| "loss": 0.1144, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.4553415061295971, |
| "grad_norm": 0.6596592368497637, |
| "learning_rate": 1.7209738131011977e-06, |
| "loss": 0.0815, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.457092819614711, |
| "grad_norm": 0.8157916293097928, |
| "learning_rate": 1.7106023329000932e-06, |
| "loss": 0.0825, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.458844133099825, |
| "grad_norm": 0.8170690188009203, |
| "learning_rate": 1.700255746066093e-06, |
| "loss": 0.0768, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.4605954465849387, |
| "grad_norm": 0.7806828063640731, |
| "learning_rate": 1.6899341308996704e-06, |
| "loss": 0.0828, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.4623467600700526, |
| "grad_norm": 0.7615625289107787, |
| "learning_rate": 1.6796375655123126e-06, |
| "loss": 0.0983, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.4640980735551663, |
| "grad_norm": 1.1145859850353201, |
| "learning_rate": 1.6693661278259438e-06, |
| "loss": 0.1593, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.4658493870402802, |
| "grad_norm": 0.8460031372441591, |
| "learning_rate": 1.659119895572322e-06, |
| "loss": 0.0713, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.4676007005253942, |
| "grad_norm": 0.8666355376799286, |
| "learning_rate": 1.648898946292456e-06, |
| "loss": 0.0795, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.4693520140105079, |
| "grad_norm": 1.098767971036189, |
| "learning_rate": 1.6387033573360244e-06, |
| "loss": 0.1291, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.4711033274956218, |
| "grad_norm": 1.0895405573642913, |
| "learning_rate": 1.62853320586078e-06, |
| "loss": 0.1035, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.4728546409807355, |
| "grad_norm": 1.0610577874468181, |
| "learning_rate": 1.6183885688319755e-06, |
| "loss": 0.1761, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.4746059544658494, |
| "grad_norm": 0.9087423490771874, |
| "learning_rate": 1.6082695230217721e-06, |
| "loss": 0.0903, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.4763572679509633, |
| "grad_norm": 1.4022223726835983, |
| "learning_rate": 1.5981761450086647e-06, |
| "loss": 0.1407, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.478108581436077, |
| "grad_norm": 0.8453730182740572, |
| "learning_rate": 1.588108511176899e-06, |
| "loss": 0.0801, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.479859894921191, |
| "grad_norm": 0.7650759951583646, |
| "learning_rate": 1.5780666977158976e-06, |
| "loss": 0.0898, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.4816112084063047, |
| "grad_norm": 1.0534592243091558, |
| "learning_rate": 1.5680507806196815e-06, |
| "loss": 0.1065, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.4833625218914186, |
| "grad_norm": 0.7581070203659848, |
| "learning_rate": 1.558060835686291e-06, |
| "loss": 0.0768, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.4851138353765325, |
| "grad_norm": 0.8228217306630333, |
| "learning_rate": 1.548096938517215e-06, |
| "loss": 0.0864, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.4868651488616462, |
| "grad_norm": 0.6948138142476058, |
| "learning_rate": 1.5381591645168214e-06, |
| "loss": 0.0727, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.4886164623467601, |
| "grad_norm": 1.1215312769377082, |
| "learning_rate": 1.5282475888917837e-06, |
| "loss": 0.1084, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.4903677758318739, |
| "grad_norm": 0.556612824562337, |
| "learning_rate": 1.5183622866505149e-06, |
| "loss": 0.0378, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.4921190893169878, |
| "grad_norm": 0.8334893621906851, |
| "learning_rate": 1.5085033326025933e-06, |
| "loss": 0.1058, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.4938704028021017, |
| "grad_norm": 0.8483067154096786, |
| "learning_rate": 1.4986708013582013e-06, |
| "loss": 0.0593, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.4956217162872154, |
| "grad_norm": 0.7563089529959395, |
| "learning_rate": 1.4888647673275598e-06, |
| "loss": 0.0881, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.4973730297723291, |
| "grad_norm": 0.9581064139110383, |
| "learning_rate": 1.4790853047203674e-06, |
| "loss": 0.1231, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.499124343257443, |
| "grad_norm": 0.8646013332942565, |
| "learning_rate": 1.4693324875452369e-06, |
| "loss": 0.0962, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.500875656742557, |
| "grad_norm": 0.8161715950753782, |
| "learning_rate": 1.4596063896091316e-06, |
| "loss": 0.0984, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.5026269702276709, |
| "grad_norm": 0.85028135397844, |
| "learning_rate": 1.4499070845168112e-06, |
| "loss": 0.0998, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.5043782837127846, |
| "grad_norm": 0.8492942866077009, |
| "learning_rate": 1.4402346456702737e-06, |
| "loss": 0.0802, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.5061295971978983, |
| "grad_norm": 1.0442880468905495, |
| "learning_rate": 1.4305891462682004e-06, |
| "loss": 0.1154, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.5078809106830122, |
| "grad_norm": 0.8309778737447728, |
| "learning_rate": 1.420970659305404e-06, |
| "loss": 0.1184, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.5096322241681261, |
| "grad_norm": 1.0968471389762762, |
| "learning_rate": 1.4113792575722684e-06, |
| "loss": 0.0877, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.51138353765324, |
| "grad_norm": 0.7388443383068837, |
| "learning_rate": 1.4018150136542063e-06, |
| "loss": 0.0431, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.5131348511383538, |
| "grad_norm": 0.978572938428472, |
| "learning_rate": 1.3922779999311032e-06, |
| "loss": 0.0662, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.5148861646234675, |
| "grad_norm": 0.825879975581395, |
| "learning_rate": 1.3827682885767778e-06, |
| "loss": 0.0741, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.5166374781085814, |
| "grad_norm": 0.8275889522903741, |
| "learning_rate": 1.3732859515584306e-06, |
| "loss": 0.0719, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.5183887915936953, |
| "grad_norm": 0.8930447963765076, |
| "learning_rate": 1.363831060636096e-06, |
| "loss": 0.101, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.5201401050788093, |
| "grad_norm": 1.0626174646711952, |
| "learning_rate": 1.3544036873621054e-06, |
| "loss": 0.1285, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.521891418563923, |
| "grad_norm": 0.9900839577095674, |
| "learning_rate": 1.345003903080541e-06, |
| "loss": 0.072, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.5236427320490367, |
| "grad_norm": 0.9536264805723799, |
| "learning_rate": 1.335631778926702e-06, |
| "loss": 0.1401, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.5253940455341506, |
| "grad_norm": 0.816870903519518, |
| "learning_rate": 1.3262873858265618e-06, |
| "loss": 0.0764, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.5271453590192645, |
| "grad_norm": 1.1150610380119643, |
| "learning_rate": 1.316970794496229e-06, |
| "loss": 0.0694, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.5288966725043784, |
| "grad_norm": 0.955595059805072, |
| "learning_rate": 1.3076820754414165e-06, |
| "loss": 0.0844, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.5306479859894921, |
| "grad_norm": 0.8892461247283927, |
| "learning_rate": 1.2984212989569055e-06, |
| "loss": 0.0709, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.5323992994746058, |
| "grad_norm": 1.0183197020080885, |
| "learning_rate": 1.2891885351260191e-06, |
| "loss": 0.0835, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.5341506129597198, |
| "grad_norm": 0.7689697067022396, |
| "learning_rate": 1.2799838538200804e-06, |
| "loss": 0.0865, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.5359019264448337, |
| "grad_norm": 0.9433827530845643, |
| "learning_rate": 1.270807324697898e-06, |
| "loss": 0.0831, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.5376532399299476, |
| "grad_norm": 0.7106251655802119, |
| "learning_rate": 1.2616590172052268e-06, |
| "loss": 0.0772, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.5394045534150613, |
| "grad_norm": 0.787230689946638, |
| "learning_rate": 1.252539000574246e-06, |
| "loss": 0.0839, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.541155866900175, |
| "grad_norm": 0.7954726817869575, |
| "learning_rate": 1.2434473438230426e-06, |
| "loss": 0.0655, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.542907180385289, |
| "grad_norm": 0.8902609833747344, |
| "learning_rate": 1.2343841157550757e-06, |
| "loss": 0.0812, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.5446584938704029, |
| "grad_norm": 0.8517459203382189, |
| "learning_rate": 1.2253493849586695e-06, |
| "loss": 0.091, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.5464098073555166, |
| "grad_norm": 0.9150985106553249, |
| "learning_rate": 1.2163432198064834e-06, |
| "loss": 0.0957, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.5481611208406305, |
| "grad_norm": 1.0872606970611478, |
| "learning_rate": 1.207365688454999e-06, |
| "loss": 0.0643, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.5499124343257442, |
| "grad_norm": 1.0678678238356631, |
| "learning_rate": 1.1984168588440075e-06, |
| "loss": 0.089, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.5516637478108581, |
| "grad_norm": 0.8484700364176362, |
| "learning_rate": 1.1894967986960877e-06, |
| "loss": 0.089, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.553415061295972, |
| "grad_norm": 0.8940833430222472, |
| "learning_rate": 1.1806055755161029e-06, |
| "loss": 0.107, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.5551663747810858, |
| "grad_norm": 0.8190347787792227, |
| "learning_rate": 1.1717432565906817e-06, |
| "loss": 0.0787, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.5569176882661997, |
| "grad_norm": 0.71210611816795, |
| "learning_rate": 1.1629099089877116e-06, |
| "loss": 0.0665, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.5586690017513134, |
| "grad_norm": 0.9042648925525666, |
| "learning_rate": 1.154105599555837e-06, |
| "loss": 0.0886, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.5604203152364273, |
| "grad_norm": 0.9505703326726503, |
| "learning_rate": 1.1453303949239431e-06, |
| "loss": 0.097, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.5621716287215412, |
| "grad_norm": 1.3798512911645553, |
| "learning_rate": 1.1365843615006606e-06, |
| "loss": 0.0818, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.563922942206655, |
| "grad_norm": 0.877599922223935, |
| "learning_rate": 1.127867565473858e-06, |
| "loss": 0.0697, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.5656742556917689, |
| "grad_norm": 0.8465883171108056, |
| "learning_rate": 1.11918007281014e-06, |
| "loss": 0.0831, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.5674255691768826, |
| "grad_norm": 0.9027728204073343, |
| "learning_rate": 1.1105219492543567e-06, |
| "loss": 0.1178, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.5691768826619965, |
| "grad_norm": 0.976849713853511, |
| "learning_rate": 1.1018932603290927e-06, |
| "loss": 0.1209, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.5709281961471104, |
| "grad_norm": 0.9584305854695117, |
| "learning_rate": 1.0932940713341843e-06, |
| "loss": 0.1158, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.5726795096322241, |
| "grad_norm": 0.8686276598286167, |
| "learning_rate": 1.0847244473462165e-06, |
| "loss": 0.0715, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.5744308231173378, |
| "grad_norm": 1.0091500961047988, |
| "learning_rate": 1.0761844532180322e-06, |
| "loss": 0.0961, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.5761821366024518, |
| "grad_norm": 0.7807508240613471, |
| "learning_rate": 1.067674153578247e-06, |
| "loss": 0.0664, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.5779334500875657, |
| "grad_norm": 0.8067749796026943, |
| "learning_rate": 1.05919361283075e-06, |
| "loss": 0.0636, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.5796847635726796, |
| "grad_norm": 0.8759620340871858, |
| "learning_rate": 1.0507428951542293e-06, |
| "loss": 0.0753, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.5814360770577933, |
| "grad_norm": 0.975397189576844, |
| "learning_rate": 1.042322064501673e-06, |
| "loss": 0.0825, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.583187390542907, |
| "grad_norm": 0.8598966333634731, |
| "learning_rate": 1.0339311845998929e-06, |
| "loss": 0.0713, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.584938704028021, |
| "grad_norm": 0.7086427629579961, |
| "learning_rate": 1.025570318949044e-06, |
| "loss": 0.0588, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.5866900175131349, |
| "grad_norm": 0.9371811065539839, |
| "learning_rate": 1.0172395308221355e-06, |
| "loss": 0.1025, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.5884413309982488, |
| "grad_norm": 0.8523909056255194, |
| "learning_rate": 1.008938883264563e-06, |
| "loss": 0.0785, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.5901926444833625, |
| "grad_norm": 0.6932575906112352, |
| "learning_rate": 1.0006684390936206e-06, |
| "loss": 0.0527, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.5919439579684762, |
| "grad_norm": 0.7895131639078904, |
| "learning_rate": 9.924282608980318e-07, |
| "loss": 0.0672, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.5936952714535901, |
| "grad_norm": 0.9109376997806669, |
| "learning_rate": 9.84218411037477e-07, |
| "loss": 0.0695, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.595446584938704, |
| "grad_norm": 1.375391449767245, |
| "learning_rate": 9.760389516421143e-07, |
| "loss": 0.1032, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.597197898423818, |
| "grad_norm": 0.7645671144180791, |
| "learning_rate": 9.678899446121205e-07, |
| "loss": 0.0487, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.5989492119089317, |
| "grad_norm": 0.98070657894628, |
| "learning_rate": 9.597714516172107e-07, |
| "loss": 0.1004, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.6007005253940454, |
| "grad_norm": 0.8547769727295768, |
| "learning_rate": 9.516835340961783e-07, |
| "loss": 0.0743, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.6024518388791593, |
| "grad_norm": 1.0122657120554246, |
| "learning_rate": 9.436262532564316e-07, |
| "loss": 0.1235, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.6042031523642732, |
| "grad_norm": 1.0918909664473564, |
| "learning_rate": 9.355996700735242e-07, |
| "loss": 0.0997, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.6059544658493872, |
| "grad_norm": 0.9405618536030825, |
| "learning_rate": 9.276038452907016e-07, |
| "loss": 0.0692, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.6077057793345009, |
| "grad_norm": 0.9492717016651747, |
| "learning_rate": 9.19638839418433e-07, |
| "loss": 0.0828, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.6094570928196146, |
| "grad_norm": 0.7689394884673381, |
| "learning_rate": 9.117047127339579e-07, |
| "loss": 0.0973, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.6112084063047285, |
| "grad_norm": 0.8844949815830236, |
| "learning_rate": 9.038015252808335e-07, |
| "loss": 0.0863, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.6129597197898424, |
| "grad_norm": 0.7795777404725, |
| "learning_rate": 8.959293368684713e-07, |
| "loss": 0.0707, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.6147110332749564, |
| "grad_norm": 0.9595778145894932, |
| "learning_rate": 8.880882070716945e-07, |
| "loss": 0.0936, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.61646234676007, |
| "grad_norm": 0.9497307647401774, |
| "learning_rate": 8.80278195230278e-07, |
| "loss": 0.0941, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.6182136602451838, |
| "grad_norm": 1.1659675647916, |
| "learning_rate": 8.724993604485044e-07, |
| "loss": 0.1023, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.6199649737302977, |
| "grad_norm": 1.1234743594327323, |
| "learning_rate": 8.647517615947193e-07, |
| "loss": 0.0776, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.6217162872154116, |
| "grad_norm": 0.8528927864364849, |
| "learning_rate": 8.57035457300876e-07, |
| "loss": 0.0807, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.6234676007005255, |
| "grad_norm": 0.83886810565477, |
| "learning_rate": 8.49350505962106e-07, |
| "loss": 0.0594, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.6252189141856392, |
| "grad_norm": 0.855463864117844, |
| "learning_rate": 8.416969657362622e-07, |
| "loss": 0.0819, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.626970227670753, |
| "grad_norm": 0.908767552315424, |
| "learning_rate": 8.340748945434879e-07, |
| "loss": 0.1285, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.6287215411558669, |
| "grad_norm": 1.0012453661601142, |
| "learning_rate": 8.264843500657799e-07, |
| "loss": 0.0861, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.6304728546409808, |
| "grad_norm": 1.0333069552134135, |
| "learning_rate": 8.189253897465433e-07, |
| "loss": 0.0753, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.6322241681260947, |
| "grad_norm": 0.905280465006129, |
| "learning_rate": 8.113980707901653e-07, |
| "loss": 0.0899, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.6339754816112084, |
| "grad_norm": 1.0338044714761787, |
| "learning_rate": 8.039024501615777e-07, |
| "loss": 0.0938, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.6357267950963221, |
| "grad_norm": 2.359076466144793, |
| "learning_rate": 7.964385845858258e-07, |
| "loss": 0.1217, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.637478108581436, |
| "grad_norm": 0.9826730256245049, |
| "learning_rate": 7.890065305476441e-07, |
| "loss": 0.1191, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.63922942206655, |
| "grad_norm": 0.8747332183619412, |
| "learning_rate": 7.816063442910193e-07, |
| "loss": 0.0993, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.640980735551664, |
| "grad_norm": 0.8075496453024368, |
| "learning_rate": 7.742380818187772e-07, |
| "loss": 0.071, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.6427320490367776, |
| "grad_norm": 1.001355554288775, |
| "learning_rate": 7.669017988921474e-07, |
| "loss": 0.1207, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.6444833625218913, |
| "grad_norm": 0.8066142991914813, |
| "learning_rate": 7.595975510303466e-07, |
| "loss": 0.0833, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.6462346760070052, |
| "grad_norm": 0.845058894943196, |
| "learning_rate": 7.523253935101577e-07, |
| "loss": 0.0838, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.6479859894921192, |
| "grad_norm": 0.8892397239300569, |
| "learning_rate": 7.45085381365514e-07, |
| "loss": 0.0842, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.649737302977233, |
| "grad_norm": 0.7913749401745233, |
| "learning_rate": 7.378775693870793e-07, |
| "loss": 0.0656, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.6514886164623468, |
| "grad_norm": 0.9412135475034102, |
| "learning_rate": 7.307020121218333e-07, |
| "loss": 0.0988, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.6532399299474605, |
| "grad_norm": 0.6922791542112726, |
| "learning_rate": 7.235587638726599e-07, |
| "loss": 0.0644, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.6549912434325744, |
| "grad_norm": 0.683548226075825, |
| "learning_rate": 7.164478786979356e-07, |
| "loss": 0.0507, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.6567425569176883, |
| "grad_norm": 0.9731267616312939, |
| "learning_rate": 7.093694104111237e-07, |
| "loss": 0.078, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.658493870402802, |
| "grad_norm": 0.9269892309271431, |
| "learning_rate": 7.023234125803635e-07, |
| "loss": 0.1005, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.660245183887916, |
| "grad_norm": 0.9430923254149106, |
| "learning_rate": 6.953099385280632e-07, |
| "loss": 0.063, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.6619964973730297, |
| "grad_norm": 0.954580758978347, |
| "learning_rate": 6.883290413305011e-07, |
| "loss": 0.1154, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.6637478108581436, |
| "grad_norm": 0.9178349147320257, |
| "learning_rate": 6.813807738174199e-07, |
| "loss": 0.0574, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.6654991243432575, |
| "grad_norm": 0.8477054090991074, |
| "learning_rate": 6.744651885716313e-07, |
| "loss": 0.0713, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.6672504378283712, |
| "grad_norm": 1.033767975754156, |
| "learning_rate": 6.675823379286151e-07, |
| "loss": 0.1363, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.6690017513134852, |
| "grad_norm": 0.8140614446219004, |
| "learning_rate": 6.607322739761219e-07, |
| "loss": 0.0811, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.6707530647985989, |
| "grad_norm": 0.8580159228495404, |
| "learning_rate": 6.53915048553781e-07, |
| "loss": 0.0786, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.6725043782837128, |
| "grad_norm": 1.0237229957514502, |
| "learning_rate": 6.471307132527071e-07, |
| "loss": 0.072, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.6742556917688267, |
| "grad_norm": 1.1139363365853103, |
| "learning_rate": 6.40379319415112e-07, |
| "loss": 0.1586, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.6760070052539404, |
| "grad_norm": 0.8039008743188236, |
| "learning_rate": 6.336609181339148e-07, |
| "loss": 0.074, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.6777583187390543, |
| "grad_norm": 0.955504225000358, |
| "learning_rate": 6.269755602523531e-07, |
| "loss": 0.0941, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.679509632224168, |
| "grad_norm": 1.1369679103050756, |
| "learning_rate": 6.203232963636003e-07, |
| "loss": 0.0953, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.681260945709282, |
| "grad_norm": 0.8138093638660885, |
| "learning_rate": 6.137041768103819e-07, |
| "loss": 0.0682, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.683012259194396, |
| "grad_norm": 0.9438149839513782, |
| "learning_rate": 6.071182516845974e-07, |
| "loss": 0.0759, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.6847635726795096, |
| "grad_norm": 1.1428154671493533, |
| "learning_rate": 6.005655708269386e-07, |
| "loss": 0.0851, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.6865148861646233, |
| "grad_norm": 0.8193942407950339, |
| "learning_rate": 5.9404618382651e-07, |
| "loss": 0.0985, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.6882661996497372, |
| "grad_norm": 1.054106752133417, |
| "learning_rate": 5.87560140020459e-07, |
| "loss": 0.0707, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.6900175131348512, |
| "grad_norm": 0.8940576683459753, |
| "learning_rate": 5.811074884935964e-07, |
| "loss": 0.0683, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.691768826619965, |
| "grad_norm": 0.6287784415256515, |
| "learning_rate": 5.746882780780322e-07, |
| "loss": 0.0589, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.6935201401050788, |
| "grad_norm": 0.9750764230003164, |
| "learning_rate": 5.683025573528017e-07, |
| "loss": 0.1097, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.6952714535901925, |
| "grad_norm": 0.9218101868960946, |
| "learning_rate": 5.619503746434956e-07, |
| "loss": 0.1168, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.6970227670753064, |
| "grad_norm": 1.1755520131559736, |
| "learning_rate": 5.55631778021899e-07, |
| "loss": 0.0959, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.6987740805604203, |
| "grad_norm": 0.8414667003154775, |
| "learning_rate": 5.493468153056236e-07, |
| "loss": 0.0664, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.7005253940455343, |
| "grad_norm": 0.94270536937066, |
| "learning_rate": 5.430955340577515e-07, |
| "loss": 0.0608, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.702276707530648, |
| "grad_norm": 1.1274506094616077, |
| "learning_rate": 5.368779815864678e-07, |
| "loss": 0.1253, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.7040280210157617, |
| "grad_norm": 1.0875056272885322, |
| "learning_rate": 5.306942049447095e-07, |
| "loss": 0.0803, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.7057793345008756, |
| "grad_norm": 0.7586215304561557, |
| "learning_rate": 5.245442509298038e-07, |
| "loss": 0.0707, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.7075306479859895, |
| "grad_norm": 0.9854236513125099, |
| "learning_rate": 5.184281660831158e-07, |
| "loss": 0.0862, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.7092819614711035, |
| "grad_norm": 1.1000068183348335, |
| "learning_rate": 5.123459966897021e-07, |
| "loss": 0.0804, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.7110332749562172, |
| "grad_norm": 0.9054286431304135, |
| "learning_rate": 5.062977887779486e-07, |
| "loss": 0.0605, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.7127845884413309, |
| "grad_norm": 0.7861214593313163, |
| "learning_rate": 5.002835881192336e-07, |
| "loss": 0.0827, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.7145359019264448, |
| "grad_norm": 0.8032715915593343, |
| "learning_rate": 4.943034402275754e-07, |
| "loss": 0.0983, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.7162872154115587, |
| "grad_norm": 1.0469596242294776, |
| "learning_rate": 4.88357390359287e-07, |
| "loss": 0.0669, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.7180385288966726, |
| "grad_norm": 1.2227292003086079, |
| "learning_rate": 4.824454835126402e-07, |
| "loss": 0.1081, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.7197898423817863, |
| "grad_norm": 0.8383307838231276, |
| "learning_rate": 4.765677644275163e-07, |
| "loss": 0.1177, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.7215411558669, |
| "grad_norm": 0.8845101264170805, |
| "learning_rate": 4.707242775850751e-07, |
| "loss": 0.0825, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.723292469352014, |
| "grad_norm": 0.8738347124266663, |
| "learning_rate": 4.6491506720741376e-07, |
| "loss": 0.0767, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.725043782837128, |
| "grad_norm": 0.9235780941896881, |
| "learning_rate": 4.591401772572313e-07, |
| "loss": 0.1073, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.7267950963222418, |
| "grad_norm": 0.808520171929526, |
| "learning_rate": 4.533996514375033e-07, |
| "loss": 0.0888, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.7285464098073555, |
| "grad_norm": 0.8040022329220664, |
| "learning_rate": 4.476935331911397e-07, |
| "loss": 0.0689, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.7302977232924692, |
| "grad_norm": 1.024560760379506, |
| "learning_rate": 4.4202186570066753e-07, |
| "loss": 0.0624, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.7320490367775832, |
| "grad_norm": 0.7655215543483458, |
| "learning_rate": 4.363846918878961e-07, |
| "loss": 0.0641, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.733800350262697, |
| "grad_norm": 0.8347354918909909, |
| "learning_rate": 4.307820544135938e-07, |
| "loss": 0.065, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.735551663747811, |
| "grad_norm": 1.0085283172237212, |
| "learning_rate": 4.2521399567717004e-07, |
| "loss": 0.0696, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.7373029772329247, |
| "grad_norm": 0.9981732466737275, |
| "learning_rate": 4.1968055781634655e-07, |
| "loss": 0.0668, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.7390542907180384, |
| "grad_norm": 0.6502067313094136, |
| "learning_rate": 4.1418178270684727e-07, |
| "loss": 0.067, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.7408056042031523, |
| "grad_norm": 0.9620108166680914, |
| "learning_rate": 4.0871771196207223e-07, |
| "loss": 0.0865, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.7425569176882663, |
| "grad_norm": 0.7915472204717325, |
| "learning_rate": 4.032883869327886e-07, |
| "loss": 0.0725, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.7443082311733802, |
| "grad_norm": 1.11392133448476, |
| "learning_rate": 3.9789384870681904e-07, |
| "loss": 0.0976, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.746059544658494, |
| "grad_norm": 0.8699184608686867, |
| "learning_rate": 3.925341381087239e-07, |
| "loss": 0.0631, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.7478108581436076, |
| "grad_norm": 0.7718373090564703, |
| "learning_rate": 3.872092956995005e-07, |
| "loss": 0.0555, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.7495621716287215, |
| "grad_norm": 1.2956166388511012, |
| "learning_rate": 3.81919361776269e-07, |
| "loss": 0.1143, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.7513134851138354, |
| "grad_norm": 1.1331409176270781, |
| "learning_rate": 3.7666437637197127e-07, |
| "loss": 0.0937, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.7513134851138354, |
| "eval_loss": 0.20276139676570892, |
| "eval_runtime": 1.9019, |
| "eval_samples_per_second": 24.712, |
| "eval_steps_per_second": 6.31, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.7530647985989494, |
| "grad_norm": 0.7722864692389659, |
| "learning_rate": 3.714443792550687e-07, |
| "loss": 0.0783, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.754816112084063, |
| "grad_norm": 0.859006539210779, |
| "learning_rate": 3.6625940992923826e-07, |
| "loss": 0.0823, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.7565674255691768, |
| "grad_norm": 0.9250172746239839, |
| "learning_rate": 3.611095076330762e-07, |
| "loss": 0.1252, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.7583187390542907, |
| "grad_norm": 1.125672327130624, |
| "learning_rate": 3.559947113397988e-07, |
| "loss": 0.0956, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.7600700525394046, |
| "grad_norm": 1.0498908967100793, |
| "learning_rate": 3.509150597569483e-07, |
| "loss": 0.0776, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.7618213660245186, |
| "grad_norm": 0.9371798046914668, |
| "learning_rate": 3.458705913261029e-07, |
| "loss": 0.0605, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.7635726795096323, |
| "grad_norm": 0.8138873352682954, |
| "learning_rate": 3.4086134422257945e-07, |
| "loss": 0.0592, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.765323992994746, |
| "grad_norm": 0.9071537379118976, |
| "learning_rate": 3.3588735635515177e-07, |
| "loss": 0.1014, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.7670753064798599, |
| "grad_norm": 0.9437847210944791, |
| "learning_rate": 3.309486653657584e-07, |
| "loss": 0.1097, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.7688266199649738, |
| "grad_norm": 1.0469256700567728, |
| "learning_rate": 3.260453086292187e-07, |
| "loss": 0.0508, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.7705779334500875, |
| "grad_norm": 0.9028655810648165, |
| "learning_rate": 3.2117732325295416e-07, |
| "loss": 0.0708, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.7723292469352014, |
| "grad_norm": 1.0851027389495533, |
| "learning_rate": 3.163447460767005e-07, |
| "loss": 0.0761, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.7740805604203151, |
| "grad_norm": 0.9493103571999805, |
| "learning_rate": 3.115476136722362e-07, |
| "loss": 0.0996, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.775831873905429, |
| "grad_norm": 0.7586774662370825, |
| "learning_rate": 3.067859623431008e-07, |
| "loss": 0.0727, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.777583187390543, |
| "grad_norm": 0.9384004769032015, |
| "learning_rate": 3.0205982812431924e-07, |
| "loss": 0.0723, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.7793345008756567, |
| "grad_norm": 1.4973320222136197, |
| "learning_rate": 2.973692467821371e-07, |
| "loss": 0.125, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.7810858143607706, |
| "grad_norm": 0.8533671156495338, |
| "learning_rate": 2.927142538137384e-07, |
| "loss": 0.0596, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.7828371278458843, |
| "grad_norm": 0.7496043304937501, |
| "learning_rate": 2.880948844469872e-07, |
| "loss": 0.0788, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.7845884413309983, |
| "grad_norm": 1.3612555191011448, |
| "learning_rate": 2.8351117364015526e-07, |
| "loss": 0.0955, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.7863397548161122, |
| "grad_norm": 0.9630356160518716, |
| "learning_rate": 2.78963156081658e-07, |
| "loss": 0.0988, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.7880910683012259, |
| "grad_norm": 1.1121115936311063, |
| "learning_rate": 2.744508661897949e-07, |
| "loss": 0.1138, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.7898423817863398, |
| "grad_norm": 0.7339445298389599, |
| "learning_rate": 2.6997433811248475e-07, |
| "loss": 0.0927, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.7915936952714535, |
| "grad_norm": 0.8953178801892129, |
| "learning_rate": 2.6553360572701195e-07, |
| "loss": 0.1039, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.7933450087565674, |
| "grad_norm": 1.0398978963579788, |
| "learning_rate": 2.6112870263976686e-07, |
| "loss": 0.1398, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.7950963222416814, |
| "grad_norm": 1.182068666933184, |
| "learning_rate": 2.5675966218599136e-07, |
| "loss": 0.1103, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.796847635726795, |
| "grad_norm": 0.8892213526204024, |
| "learning_rate": 2.524265174295293e-07, |
| "loss": 0.0669, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.7985989492119088, |
| "grad_norm": 0.8059578982086425, |
| "learning_rate": 2.481293011625724e-07, |
| "loss": 0.0648, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.8003502626970227, |
| "grad_norm": 0.7102065798669428, |
| "learning_rate": 2.438680459054171e-07, |
| "loss": 0.0718, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.8021015761821366, |
| "grad_norm": 0.6852586354069934, |
| "learning_rate": 2.3964278390621374e-07, |
| "loss": 0.0826, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.8038528896672505, |
| "grad_norm": 0.6825735878520617, |
| "learning_rate": 2.3545354714072265e-07, |
| "loss": 0.0569, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.8056042031523643, |
| "grad_norm": 0.945132269971334, |
| "learning_rate": 2.3130036731207893e-07, |
| "loss": 0.1009, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.807355516637478, |
| "grad_norm": 0.7485096153539492, |
| "learning_rate": 2.2718327585054156e-07, |
| "loss": 0.0551, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.8091068301225919, |
| "grad_norm": 1.0031751267167337, |
| "learning_rate": 2.2310230391326682e-07, |
| "loss": 0.1056, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.8108581436077058, |
| "grad_norm": 0.7465291965071548, |
| "learning_rate": 2.190574823840641e-07, |
| "loss": 0.0799, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.8126094570928197, |
| "grad_norm": 1.0587014217350015, |
| "learning_rate": 2.15048841873165e-07, |
| "loss": 0.0851, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.8143607705779334, |
| "grad_norm": 1.1062177226269723, |
| "learning_rate": 2.110764127169923e-07, |
| "loss": 0.1016, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.8161120840630471, |
| "grad_norm": 0.8981661281109393, |
| "learning_rate": 2.0714022497793197e-07, |
| "loss": 0.0762, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.817863397548161, |
| "grad_norm": 0.9608421098019754, |
| "learning_rate": 2.0324030844410204e-07, |
| "loss": 0.1099, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.819614711033275, |
| "grad_norm": 0.9681573187969551, |
| "learning_rate": 1.993766926291285e-07, |
| "loss": 0.0735, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.821366024518389, |
| "grad_norm": 0.8273362165098569, |
| "learning_rate": 1.9554940677192213e-07, |
| "loss": 0.0981, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.8231173380035026, |
| "grad_norm": 0.8824152506254714, |
| "learning_rate": 1.9175847983645857e-07, |
| "loss": 0.1064, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.8248686514886163, |
| "grad_norm": 1.0042241694921472, |
| "learning_rate": 1.880039405115569e-07, |
| "loss": 0.0947, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.8266199649737302, |
| "grad_norm": 1.0580797717184558, |
| "learning_rate": 1.8428581721066486e-07, |
| "loss": 0.077, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.8283712784588442, |
| "grad_norm": 0.8676736423566411, |
| "learning_rate": 1.806041380716411e-07, |
| "loss": 0.0854, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.830122591943958, |
| "grad_norm": 1.0605831500963205, |
| "learning_rate": 1.769589309565445e-07, |
| "loss": 0.1308, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.8318739054290718, |
| "grad_norm": 1.14451694965405, |
| "learning_rate": 1.733502234514206e-07, |
| "loss": 0.0877, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.8336252189141855, |
| "grad_norm": 1.0185258661977419, |
| "learning_rate": 1.6977804286609777e-07, |
| "loss": 0.0884, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.8353765323992994, |
| "grad_norm": 0.9397169836007261, |
| "learning_rate": 1.6624241623397598e-07, |
| "loss": 0.1227, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.8371278458844134, |
| "grad_norm": 0.988120401495064, |
| "learning_rate": 1.6274337031182362e-07, |
| "loss": 0.0721, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.8388791593695273, |
| "grad_norm": 0.842379722481448, |
| "learning_rate": 1.5928093157957403e-07, |
| "loss": 0.0883, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.840630472854641, |
| "grad_norm": 0.7650106307160991, |
| "learning_rate": 1.5585512624012812e-07, |
| "loss": 0.0627, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.8423817863397547, |
| "grad_norm": 0.7520245228487481, |
| "learning_rate": 1.5246598021915304e-07, |
| "loss": 0.0583, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.8441330998248686, |
| "grad_norm": 0.7816067951505854, |
| "learning_rate": 1.4911351916488849e-07, |
| "loss": 0.0629, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.8458844133099825, |
| "grad_norm": 0.8206548278069278, |
| "learning_rate": 1.4579776844794834e-07, |
| "loss": 0.0629, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.8476357267950965, |
| "grad_norm": 0.8413065831699674, |
| "learning_rate": 1.4251875316113495e-07, |
| "loss": 0.0918, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.8493870402802102, |
| "grad_norm": 0.8646047693995625, |
| "learning_rate": 1.3927649811924182e-07, |
| "loss": 0.1067, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.8511383537653239, |
| "grad_norm": 1.1648039728609827, |
| "learning_rate": 1.3607102785887393e-07, |
| "loss": 0.1264, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.8528896672504378, |
| "grad_norm": 0.7719934600369042, |
| "learning_rate": 1.3290236663825562e-07, |
| "loss": 0.083, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.8546409807355517, |
| "grad_norm": 0.8950213917800804, |
| "learning_rate": 1.2977053843704957e-07, |
| "loss": 0.0847, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.8563922942206657, |
| "grad_norm": 1.0026565240342258, |
| "learning_rate": 1.2667556695617534e-07, |
| "loss": 0.1044, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.8581436077057794, |
| "grad_norm": 0.744107605196941, |
| "learning_rate": 1.236174756176295e-07, |
| "loss": 0.0721, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.859894921190893, |
| "grad_norm": 0.9509055299042167, |
| "learning_rate": 1.2059628756430797e-07, |
| "loss": 0.0818, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.861646234676007, |
| "grad_norm": 0.9112160743275268, |
| "learning_rate": 1.1761202565983399e-07, |
| "loss": 0.0645, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.863397548161121, |
| "grad_norm": 1.0272055127705473, |
| "learning_rate": 1.1466471248837985e-07, |
| "loss": 0.0748, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.8651488616462348, |
| "grad_norm": 0.8706834973807851, |
| "learning_rate": 1.1175437035450043e-07, |
| "loss": 0.0809, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.8669001751313485, |
| "grad_norm": 0.9563935615496911, |
| "learning_rate": 1.0888102128296052e-07, |
| "loss": 0.0581, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.8686514886164622, |
| "grad_norm": 0.5939777496692319, |
| "learning_rate": 1.0604468701857384e-07, |
| "loss": 0.0715, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.8704028021015762, |
| "grad_norm": 0.8243817906122596, |
| "learning_rate": 1.0324538902603154e-07, |
| "loss": 0.0795, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.87215411558669, |
| "grad_norm": 0.9622902905039842, |
| "learning_rate": 1.0048314848974616e-07, |
| "loss": 0.1115, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.873905429071804, |
| "grad_norm": 0.8979703410249232, |
| "learning_rate": 9.775798631368627e-08, |
| "loss": 0.0807, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.8756567425569177, |
| "grad_norm": 1.0203580216733263, |
| "learning_rate": 9.506992312122044e-08, |
| "loss": 0.1578, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.8774080560420314, |
| "grad_norm": 0.7923499980875016, |
| "learning_rate": 9.24189792549629e-08, |
| "loss": 0.0717, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.8791593695271454, |
| "grad_norm": 0.9641425590050212, |
| "learning_rate": 8.980517477661543e-08, |
| "loss": 0.0915, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.8809106830122593, |
| "grad_norm": 1.3669709921921882, |
| "learning_rate": 8.722852946682014e-08, |
| "loss": 0.1012, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.882661996497373, |
| "grad_norm": 0.9665788733716942, |
| "learning_rate": 8.468906282500577e-08, |
| "loss": 0.0614, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.884413309982487, |
| "grad_norm": 1.400962552491105, |
| "learning_rate": 8.218679406924279e-08, |
| "loss": 0.1118, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.8861646234676006, |
| "grad_norm": 0.9219968558833442, |
| "learning_rate": 7.972174213609684e-08, |
| "loss": 0.0779, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.8879159369527145, |
| "grad_norm": 0.8045432273483273, |
| "learning_rate": 7.7293925680485e-08, |
| "loss": 0.0767, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.8896672504378285, |
| "grad_norm": 0.9755957257233182, |
| "learning_rate": 7.490336307553691e-08, |
| "loss": 0.0859, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.8914185639229422, |
| "grad_norm": 1.0420255755068712, |
| "learning_rate": 7.255007241245227e-08, |
| "loss": 0.0811, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.893169877408056, |
| "grad_norm": 0.9551488849265928, |
| "learning_rate": 7.023407150036632e-08, |
| "loss": 0.1306, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.8949211908931698, |
| "grad_norm": 0.77813629526978, |
| "learning_rate": 6.795537786621564e-08, |
| "loss": 0.0741, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.8966725043782837, |
| "grad_norm": 0.8084153621002628, |
| "learning_rate": 6.571400875460154e-08, |
| "loss": 0.0814, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.8984238178633976, |
| "grad_norm": 0.8812494181028698, |
| "learning_rate": 6.350998112766626e-08, |
| "loss": 0.0897, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.9001751313485113, |
| "grad_norm": 0.7735066758206781, |
| "learning_rate": 6.1343311664957e-08, |
| "loss": 0.0895, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.9019264448336253, |
| "grad_norm": 0.8161894034164808, |
| "learning_rate": 5.92140167633054e-08, |
| "loss": 0.0899, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.903677758318739, |
| "grad_norm": 0.9086034533332163, |
| "learning_rate": 5.712211253670108e-08, |
| "loss": 0.0896, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.905429071803853, |
| "grad_norm": 0.75646864097532, |
| "learning_rate": 5.5067614816169955e-08, |
| "loss": 0.0766, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.9071803852889668, |
| "grad_norm": 0.731979768510242, |
| "learning_rate": 5.3050539149654964e-08, |
| "loss": 0.0686, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.9089316987740805, |
| "grad_norm": 0.8139155119262608, |
| "learning_rate": 5.107090080189725e-08, |
| "loss": 0.0801, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.9106830122591942, |
| "grad_norm": 0.9723091179497511, |
| "learning_rate": 4.9128714754321794e-08, |
| "loss": 0.0919, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.9124343257443082, |
| "grad_norm": 0.8354742100084726, |
| "learning_rate": 4.722399570492309e-08, |
| "loss": 0.0675, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.914185639229422, |
| "grad_norm": 0.8372591413758104, |
| "learning_rate": 4.535675806815576e-08, |
| "loss": 0.0645, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.915936952714536, |
| "grad_norm": 1.046373747378417, |
| "learning_rate": 4.352701597482245e-08, |
| "loss": 0.0907, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.9176882661996497, |
| "grad_norm": 0.8526193691562309, |
| "learning_rate": 4.173478327197e-08, |
| "loss": 0.084, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.9194395796847634, |
| "grad_norm": 0.8722546887931946, |
| "learning_rate": 3.998007352278233e-08, |
| "loss": 0.1041, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.9211908931698773, |
| "grad_norm": 0.9484274530627599, |
| "learning_rate": 3.826290000647881e-08, |
| "loss": 0.0926, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.9229422066549913, |
| "grad_norm": 1.064819089816097, |
| "learning_rate": 3.6583275718214406e-08, |
| "loss": 0.1145, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.9246935201401052, |
| "grad_norm": 0.6922227153177929, |
| "learning_rate": 3.4941213368980264e-08, |
| "loss": 0.0826, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.926444833625219, |
| "grad_norm": 0.9547055760475679, |
| "learning_rate": 3.333672538550714e-08, |
| "loss": 0.088, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.9281961471103326, |
| "grad_norm": 1.1501182412152315, |
| "learning_rate": 3.176982391017214e-08, |
| "loss": 0.1087, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.9299474605954465, |
| "grad_norm": 0.9179486185891369, |
| "learning_rate": 3.024052080090822e-08, |
| "loss": 0.0724, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.9316987740805605, |
| "grad_norm": 0.664000841194001, |
| "learning_rate": 2.874882763111153e-08, |
| "loss": 0.0474, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.9334500875656744, |
| "grad_norm": 0.7849197413612594, |
| "learning_rate": 2.7294755689555307e-08, |
| "loss": 0.0624, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.935201401050788, |
| "grad_norm": 0.768814756583036, |
| "learning_rate": 2.5878315980305548e-08, |
| "loss": 0.0741, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.9369527145359018, |
| "grad_norm": 1.003735722548012, |
| "learning_rate": 2.4499519222635493e-08, |
| "loss": 0.064, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.9387040280210157, |
| "grad_norm": 1.1491273999902563, |
| "learning_rate": 2.3158375850946268e-08, |
| "loss": 0.1346, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.9404553415061296, |
| "grad_norm": 0.7589932497855878, |
| "learning_rate": 2.1854896014686376e-08, |
| "loss": 0.0762, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.9422066549912436, |
| "grad_norm": 0.8331017479251108, |
| "learning_rate": 2.0589089578276767e-08, |
| "loss": 0.0484, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.9439579684763573, |
| "grad_norm": 0.9832005255060994, |
| "learning_rate": 1.936096612103533e-08, |
| "loss": 0.1037, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.945709281961471, |
| "grad_norm": 0.8225952913071677, |
| "learning_rate": 1.817053493710308e-08, |
| "loss": 0.0494, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.947460595446585, |
| "grad_norm": 0.7543806570940519, |
| "learning_rate": 1.7017805035375866e-08, |
| "loss": 0.0601, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.9492119089316988, |
| "grad_norm": 0.8293133630950005, |
| "learning_rate": 1.590278513943555e-08, |
| "loss": 0.0803, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.9509632224168127, |
| "grad_norm": 0.8768099650131057, |
| "learning_rate": 1.4825483687483377e-08, |
| "loss": 0.0659, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.9527145359019265, |
| "grad_norm": 0.9469149296349607, |
| "learning_rate": 1.3785908832275596e-08, |
| "loss": 0.0824, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.9544658493870402, |
| "grad_norm": 0.8351027145369079, |
| "learning_rate": 1.2784068441064611e-08, |
| "loss": 0.0678, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.956217162872154, |
| "grad_norm": 1.020423409119886, |
| "learning_rate": 1.1819970095536814e-08, |
| "loss": 0.1034, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.957968476357268, |
| "grad_norm": 0.787927571900196, |
| "learning_rate": 1.0893621091754847e-08, |
| "loss": 0.068, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.959719789842382, |
| "grad_norm": 0.6892271318399958, |
| "learning_rate": 1.0005028440104313e-08, |
| "loss": 0.0547, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.9614711033274956, |
| "grad_norm": 0.9205823435107259, |
| "learning_rate": 9.154198865239938e-09, |
| "loss": 0.1156, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.9632224168126093, |
| "grad_norm": 1.742536478730652, |
| "learning_rate": 8.341138806035043e-09, |
| "loss": 0.1909, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.9649737302977233, |
| "grad_norm": 1.0405677994948164, |
| "learning_rate": 7.565854415531037e-09, |
| "loss": 0.1204, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.9667250437828372, |
| "grad_norm": 0.9209713667024632, |
| "learning_rate": 6.8283515608924545e-09, |
| "loss": 0.0842, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.9684763572679511, |
| "grad_norm": 0.847022736110267, |
| "learning_rate": 6.128635823364204e-09, |
| "loss": 0.0647, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.9702276707530648, |
| "grad_norm": 0.8569688135314069, |
| "learning_rate": 5.466712498225501e-09, |
| "loss": 0.0984, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.9719789842381785, |
| "grad_norm": 0.7807927515616613, |
| "learning_rate": 4.8425865947515635e-09, |
| "loss": 0.0919, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.9737302977232924, |
| "grad_norm": 0.803314368964672, |
| "learning_rate": 4.256262836176972e-09, |
| "loss": 0.0748, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.9754816112084064, |
| "grad_norm": 0.8192070789421884, |
| "learning_rate": 3.7077456596584793e-09, |
| "loss": 0.0622, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.9772329246935203, |
| "grad_norm": 0.7402355614601918, |
| "learning_rate": 3.197039216241149e-09, |
| "loss": 0.0611, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.978984238178634, |
| "grad_norm": 0.8005803168162587, |
| "learning_rate": 2.7241473708283784e-09, |
| "loss": 0.082, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.9807355516637477, |
| "grad_norm": 0.9029956857496308, |
| "learning_rate": 2.2890737021513675e-09, |
| "loss": 0.099, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.9824868651488616, |
| "grad_norm": 0.786670113306005, |
| "learning_rate": 1.8918215027424746e-09, |
| "loss": 0.1038, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.9842381786339756, |
| "grad_norm": 0.8219187516038525, |
| "learning_rate": 1.532393778910235e-09, |
| "loss": 0.0785, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.9859894921190895, |
| "grad_norm": 0.7919831938955016, |
| "learning_rate": 1.2107932507177123e-09, |
| "loss": 0.0784, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.9877408056042032, |
| "grad_norm": 0.9217680061030283, |
| "learning_rate": 9.270223519586285e-10, |
| "loss": 0.086, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.989492119089317, |
| "grad_norm": 0.7642493509648998, |
| "learning_rate": 6.810832301440417e-10, |
| "loss": 0.0862, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.9912434325744308, |
| "grad_norm": 0.8154622682049354, |
| "learning_rate": 4.729777464806961e-10, |
| "loss": 0.0821, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.9929947460595447, |
| "grad_norm": 0.8127834975137689, |
| "learning_rate": 3.0270747586103045e-10, |
| "loss": 0.1092, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.9947460595446584, |
| "grad_norm": 1.0067550313335858, |
| "learning_rate": 1.702737068492999e-10, |
| "loss": 0.0985, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.9964973730297724, |
| "grad_norm": 0.9383181919963424, |
| "learning_rate": 7.567744167269464e-11, |
| "loss": 0.0958, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.998248686514886, |
| "grad_norm": 0.8519146037451801, |
| "learning_rate": 1.8919396212457865e-11, |
| "loss": 0.0717, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.6152399598695977, |
| "learning_rate": 0.0, |
| "loss": 0.0677, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 1142, |
| "total_flos": 3773592600576.0, |
| "train_loss": 0.13601795624086924, |
| "train_runtime": 863.4724, |
| "train_samples_per_second": 10.581, |
| "train_steps_per_second": 1.323 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1142, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50000000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3773592600576.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|