| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9936034115138592, |
| "eval_steps": 500, |
| "global_step": 936, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0031982942430703624, |
| "grad_norm": 7.622738078733055, |
| "learning_rate": 1.0638297872340426e-07, |
| "loss": 1.1909, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006396588486140725, |
| "grad_norm": 7.574176961529125, |
| "learning_rate": 2.1276595744680852e-07, |
| "loss": 1.1838, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.009594882729211088, |
| "grad_norm": 7.4636690419508005, |
| "learning_rate": 3.1914893617021275e-07, |
| "loss": 1.1971, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01279317697228145, |
| "grad_norm": 7.513735532368614, |
| "learning_rate": 4.2553191489361704e-07, |
| "loss": 1.1795, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.015991471215351813, |
| "grad_norm": 7.333440889375827, |
| "learning_rate": 5.319148936170213e-07, |
| "loss": 1.1448, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.019189765458422176, |
| "grad_norm": 7.3639120181383575, |
| "learning_rate": 6.382978723404255e-07, |
| "loss": 1.1893, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.022388059701492536, |
| "grad_norm": 7.438753810381023, |
| "learning_rate": 7.446808510638298e-07, |
| "loss": 1.2042, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0255863539445629, |
| "grad_norm": 6.96598869339596, |
| "learning_rate": 8.510638297872341e-07, |
| "loss": 1.1527, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.028784648187633263, |
| "grad_norm": 6.606115410466868, |
| "learning_rate": 9.574468085106384e-07, |
| "loss": 1.1691, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.031982942430703626, |
| "grad_norm": 6.428618826670432, |
| "learning_rate": 1.0638297872340427e-06, |
| "loss": 1.1713, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.035181236673773986, |
| "grad_norm": 5.30277143517507, |
| "learning_rate": 1.170212765957447e-06, |
| "loss": 1.1162, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03837953091684435, |
| "grad_norm": 5.216323252768823, |
| "learning_rate": 1.276595744680851e-06, |
| "loss": 1.1101, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04157782515991471, |
| "grad_norm": 5.091458168528318, |
| "learning_rate": 1.3829787234042555e-06, |
| "loss": 1.1071, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04477611940298507, |
| "grad_norm": 3.3709915569255746, |
| "learning_rate": 1.4893617021276596e-06, |
| "loss": 1.0666, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04797441364605544, |
| "grad_norm": 3.3278073990556303, |
| "learning_rate": 1.595744680851064e-06, |
| "loss": 1.0571, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0511727078891258, |
| "grad_norm": 3.1457570812535685, |
| "learning_rate": 1.7021276595744682e-06, |
| "loss": 1.0419, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.054371002132196165, |
| "grad_norm": 3.0524886673021165, |
| "learning_rate": 1.8085106382978727e-06, |
| "loss": 1.0643, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.057569296375266525, |
| "grad_norm": 2.9940425825944055, |
| "learning_rate": 1.9148936170212767e-06, |
| "loss": 1.0184, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.060767590618336885, |
| "grad_norm": 3.353851239083325, |
| "learning_rate": 2.021276595744681e-06, |
| "loss": 1.0121, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.06396588486140725, |
| "grad_norm": 3.7889395105244073, |
| "learning_rate": 2.1276595744680853e-06, |
| "loss": 0.9889, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06716417910447761, |
| "grad_norm": 3.8783603137474665, |
| "learning_rate": 2.2340425531914894e-06, |
| "loss": 0.996, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.07036247334754797, |
| "grad_norm": 3.5352669616023475, |
| "learning_rate": 2.340425531914894e-06, |
| "loss": 0.9742, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07356076759061833, |
| "grad_norm": 3.2857152101090605, |
| "learning_rate": 2.446808510638298e-06, |
| "loss": 0.953, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0767590618336887, |
| "grad_norm": 2.7570947843045857, |
| "learning_rate": 2.553191489361702e-06, |
| "loss": 0.9587, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07995735607675906, |
| "grad_norm": 1.9278968128889937, |
| "learning_rate": 2.6595744680851065e-06, |
| "loss": 0.9329, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08315565031982942, |
| "grad_norm": 1.78597612548795, |
| "learning_rate": 2.765957446808511e-06, |
| "loss": 0.9337, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.08635394456289978, |
| "grad_norm": 1.9761748056190767, |
| "learning_rate": 2.8723404255319155e-06, |
| "loss": 0.9235, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.08955223880597014, |
| "grad_norm": 1.9959330898097134, |
| "learning_rate": 2.978723404255319e-06, |
| "loss": 0.9095, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.09275053304904052, |
| "grad_norm": 1.6188409417305705, |
| "learning_rate": 3.0851063829787237e-06, |
| "loss": 0.8924, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.09594882729211088, |
| "grad_norm": 1.5343331514431935, |
| "learning_rate": 3.191489361702128e-06, |
| "loss": 0.8759, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09914712153518124, |
| "grad_norm": 1.431667627920631, |
| "learning_rate": 3.297872340425532e-06, |
| "loss": 0.8694, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1023454157782516, |
| "grad_norm": 1.2543200154352037, |
| "learning_rate": 3.4042553191489363e-06, |
| "loss": 0.8512, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.10554371002132196, |
| "grad_norm": 1.0539218969611874, |
| "learning_rate": 3.510638297872341e-06, |
| "loss": 0.8458, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.10874200426439233, |
| "grad_norm": 1.1170568972274193, |
| "learning_rate": 3.6170212765957453e-06, |
| "loss": 0.8899, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.11194029850746269, |
| "grad_norm": 1.2202744700302341, |
| "learning_rate": 3.723404255319149e-06, |
| "loss": 0.8393, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11513859275053305, |
| "grad_norm": 1.0830349710322051, |
| "learning_rate": 3.8297872340425535e-06, |
| "loss": 0.871, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.11833688699360341, |
| "grad_norm": 0.9052734617810034, |
| "learning_rate": 3.936170212765958e-06, |
| "loss": 0.8202, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.12153518123667377, |
| "grad_norm": 0.8993984549295001, |
| "learning_rate": 4.042553191489362e-06, |
| "loss": 0.801, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.12473347547974413, |
| "grad_norm": 0.9725408899239824, |
| "learning_rate": 4.148936170212766e-06, |
| "loss": 0.8701, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.1279317697228145, |
| "grad_norm": 0.943110853985239, |
| "learning_rate": 4.255319148936171e-06, |
| "loss": 0.8114, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13113006396588486, |
| "grad_norm": 0.9352658574729733, |
| "learning_rate": 4.361702127659575e-06, |
| "loss": 0.8401, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.13432835820895522, |
| "grad_norm": 1.0170142598852345, |
| "learning_rate": 4.468085106382979e-06, |
| "loss": 0.8198, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.13752665245202558, |
| "grad_norm": 0.9009283876561643, |
| "learning_rate": 4.574468085106383e-06, |
| "loss": 0.8055, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.14072494669509594, |
| "grad_norm": 0.8173324407768994, |
| "learning_rate": 4.680851063829788e-06, |
| "loss": 0.8042, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1439232409381663, |
| "grad_norm": 0.8483547771193565, |
| "learning_rate": 4.787234042553192e-06, |
| "loss": 0.7773, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.14712153518123666, |
| "grad_norm": 0.7692184274159833, |
| "learning_rate": 4.893617021276596e-06, |
| "loss": 0.7997, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.15031982942430705, |
| "grad_norm": 0.81536863431857, |
| "learning_rate": 5e-06, |
| "loss": 0.7892, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1535181236673774, |
| "grad_norm": 0.8158497316472542, |
| "learning_rate": 5.106382978723404e-06, |
| "loss": 0.7978, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.15671641791044777, |
| "grad_norm": 0.9305475572050518, |
| "learning_rate": 5.212765957446809e-06, |
| "loss": 0.8087, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.15991471215351813, |
| "grad_norm": 0.8236050831503633, |
| "learning_rate": 5.319148936170213e-06, |
| "loss": 0.7607, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1631130063965885, |
| "grad_norm": 0.7846154878148738, |
| "learning_rate": 5.425531914893617e-06, |
| "loss": 0.8175, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.16631130063965885, |
| "grad_norm": 0.9528828885498349, |
| "learning_rate": 5.531914893617022e-06, |
| "loss": 0.791, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1695095948827292, |
| "grad_norm": 0.7710572192032797, |
| "learning_rate": 5.638297872340426e-06, |
| "loss": 0.7986, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.17270788912579957, |
| "grad_norm": 0.8318885111022261, |
| "learning_rate": 5.744680851063831e-06, |
| "loss": 0.7732, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.17590618336886993, |
| "grad_norm": 0.8923328928381091, |
| "learning_rate": 5.851063829787235e-06, |
| "loss": 0.7506, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1791044776119403, |
| "grad_norm": 0.8476738749734117, |
| "learning_rate": 5.957446808510638e-06, |
| "loss": 0.8041, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.18230277185501065, |
| "grad_norm": 0.6991963183642709, |
| "learning_rate": 6.063829787234044e-06, |
| "loss": 0.743, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.18550106609808104, |
| "grad_norm": 0.9527151297958727, |
| "learning_rate": 6.170212765957447e-06, |
| "loss": 0.7783, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1886993603411514, |
| "grad_norm": 0.8889593346162057, |
| "learning_rate": 6.276595744680851e-06, |
| "loss": 0.7437, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.19189765458422176, |
| "grad_norm": 0.6699184888475535, |
| "learning_rate": 6.382978723404256e-06, |
| "loss": 0.7714, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.19509594882729211, |
| "grad_norm": 0.7705563645271767, |
| "learning_rate": 6.48936170212766e-06, |
| "loss": 0.7744, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.19829424307036247, |
| "grad_norm": 0.8167415683795367, |
| "learning_rate": 6.595744680851064e-06, |
| "loss": 0.7561, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.20149253731343283, |
| "grad_norm": 0.81962712226257, |
| "learning_rate": 6.702127659574469e-06, |
| "loss": 0.7891, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2046908315565032, |
| "grad_norm": 0.6986640424503381, |
| "learning_rate": 6.808510638297873e-06, |
| "loss": 0.7572, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.20788912579957355, |
| "grad_norm": 0.756369209825303, |
| "learning_rate": 6.914893617021278e-06, |
| "loss": 0.7458, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.21108742004264391, |
| "grad_norm": 0.7270975619851376, |
| "learning_rate": 7.021276595744682e-06, |
| "loss": 0.7116, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.7077904574727415, |
| "learning_rate": 7.127659574468085e-06, |
| "loss": 0.741, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.21748400852878466, |
| "grad_norm": 0.7367201939644344, |
| "learning_rate": 7.234042553191491e-06, |
| "loss": 0.738, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.22068230277185502, |
| "grad_norm": 0.7461379677761271, |
| "learning_rate": 7.340425531914894e-06, |
| "loss": 0.7402, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.22388059701492538, |
| "grad_norm": 0.7635469236526181, |
| "learning_rate": 7.446808510638298e-06, |
| "loss": 0.7539, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.22707889125799574, |
| "grad_norm": 0.8662247901922688, |
| "learning_rate": 7.553191489361703e-06, |
| "loss": 0.7515, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.2302771855010661, |
| "grad_norm": 0.7957789030344851, |
| "learning_rate": 7.659574468085107e-06, |
| "loss": 0.7416, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.23347547974413646, |
| "grad_norm": 0.7423199681203461, |
| "learning_rate": 7.765957446808511e-06, |
| "loss": 0.7629, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.23667377398720682, |
| "grad_norm": 0.9173283664095695, |
| "learning_rate": 7.872340425531916e-06, |
| "loss": 0.7452, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.23987206823027718, |
| "grad_norm": 0.78368199600868, |
| "learning_rate": 7.97872340425532e-06, |
| "loss": 0.7268, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.24307036247334754, |
| "grad_norm": 0.8361765532940366, |
| "learning_rate": 8.085106382978723e-06, |
| "loss": 0.7584, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.2462686567164179, |
| "grad_norm": 0.809289414930847, |
| "learning_rate": 8.191489361702128e-06, |
| "loss": 0.7323, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.24946695095948826, |
| "grad_norm": 0.7351470133306395, |
| "learning_rate": 8.297872340425532e-06, |
| "loss": 0.7363, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2526652452025586, |
| "grad_norm": 0.8056016102088669, |
| "learning_rate": 8.404255319148937e-06, |
| "loss": 0.723, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.255863539445629, |
| "grad_norm": 0.7345894297808727, |
| "learning_rate": 8.510638297872341e-06, |
| "loss": 0.769, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.25906183368869934, |
| "grad_norm": 0.7863593332470072, |
| "learning_rate": 8.617021276595746e-06, |
| "loss": 0.728, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.2622601279317697, |
| "grad_norm": 0.7627382259597176, |
| "learning_rate": 8.72340425531915e-06, |
| "loss": 0.7283, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.26545842217484006, |
| "grad_norm": 0.7824566533257207, |
| "learning_rate": 8.829787234042555e-06, |
| "loss": 0.7529, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.26865671641791045, |
| "grad_norm": 0.7231171750656687, |
| "learning_rate": 8.936170212765958e-06, |
| "loss": 0.7335, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.27185501066098083, |
| "grad_norm": 0.9121689099240826, |
| "learning_rate": 9.042553191489362e-06, |
| "loss": 0.756, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.27505330490405117, |
| "grad_norm": 0.7192041394994152, |
| "learning_rate": 9.148936170212767e-06, |
| "loss": 0.7008, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.27825159914712155, |
| "grad_norm": 0.926261732519985, |
| "learning_rate": 9.255319148936171e-06, |
| "loss": 0.7169, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2814498933901919, |
| "grad_norm": 0.7106373516758131, |
| "learning_rate": 9.361702127659576e-06, |
| "loss": 0.7019, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2846481876332623, |
| "grad_norm": 0.8650610615775703, |
| "learning_rate": 9.46808510638298e-06, |
| "loss": 0.7579, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2878464818763326, |
| "grad_norm": 0.8537551564881841, |
| "learning_rate": 9.574468085106385e-06, |
| "loss": 0.7151, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.291044776119403, |
| "grad_norm": 0.9816090964885784, |
| "learning_rate": 9.680851063829787e-06, |
| "loss": 0.7361, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2942430703624733, |
| "grad_norm": 0.7896344914068675, |
| "learning_rate": 9.787234042553192e-06, |
| "loss": 0.7241, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2974413646055437, |
| "grad_norm": 0.8361537117215864, |
| "learning_rate": 9.893617021276596e-06, |
| "loss": 0.7063, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3006396588486141, |
| "grad_norm": 0.8817821003662552, |
| "learning_rate": 1e-05, |
| "loss": 0.711, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.30383795309168443, |
| "grad_norm": 1.1654827546629623, |
| "learning_rate": 9.999965197129365e-06, |
| "loss": 0.7493, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3070362473347548, |
| "grad_norm": 0.6905754899045732, |
| "learning_rate": 9.999860789001947e-06, |
| "loss": 0.7189, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.31023454157782515, |
| "grad_norm": 1.0193923266199096, |
| "learning_rate": 9.999686777071233e-06, |
| "loss": 0.748, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.31343283582089554, |
| "grad_norm": 0.8631527647207492, |
| "learning_rate": 9.999443163759669e-06, |
| "loss": 0.7297, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.31663113006396587, |
| "grad_norm": 0.7959272086157606, |
| "learning_rate": 9.999129952458628e-06, |
| "loss": 0.6892, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.31982942430703626, |
| "grad_norm": 1.019986780308216, |
| "learning_rate": 9.998747147528375e-06, |
| "loss": 0.6945, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3230277185501066, |
| "grad_norm": 0.9796997640078874, |
| "learning_rate": 9.998294754297992e-06, |
| "loss": 0.7683, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.326226012793177, |
| "grad_norm": 0.9025864669755898, |
| "learning_rate": 9.997772779065312e-06, |
| "loss": 0.707, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3294243070362473, |
| "grad_norm": 0.9406972010353718, |
| "learning_rate": 9.997181229096831e-06, |
| "loss": 0.7148, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.3326226012793177, |
| "grad_norm": 0.8270166326880105, |
| "learning_rate": 9.996520112627602e-06, |
| "loss": 0.7217, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3358208955223881, |
| "grad_norm": 0.8843538366343328, |
| "learning_rate": 9.995789438861128e-06, |
| "loss": 0.7195, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3390191897654584, |
| "grad_norm": 0.8701458072422218, |
| "learning_rate": 9.994989217969224e-06, |
| "loss": 0.7175, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3422174840085288, |
| "grad_norm": 0.7418076755999989, |
| "learning_rate": 9.994119461091885e-06, |
| "loss": 0.7158, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.34541577825159914, |
| "grad_norm": 0.8703976870763576, |
| "learning_rate": 9.993180180337126e-06, |
| "loss": 0.6813, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3486140724946695, |
| "grad_norm": 0.9178054118198938, |
| "learning_rate": 9.992171388780814e-06, |
| "loss": 0.712, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.35181236673773986, |
| "grad_norm": 0.8589547671449834, |
| "learning_rate": 9.991093100466482e-06, |
| "loss": 0.7499, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.35501066098081024, |
| "grad_norm": 0.8726951716749137, |
| "learning_rate": 9.989945330405146e-06, |
| "loss": 0.6837, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3582089552238806, |
| "grad_norm": 0.927713037073185, |
| "learning_rate": 9.988728094575082e-06, |
| "loss": 0.7342, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.36140724946695096, |
| "grad_norm": 0.7505991388636275, |
| "learning_rate": 9.98744140992161e-06, |
| "loss": 0.7094, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3646055437100213, |
| "grad_norm": 0.894268442242807, |
| "learning_rate": 9.986085294356858e-06, |
| "loss": 0.6777, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3678038379530917, |
| "grad_norm": 0.8024161706862449, |
| "learning_rate": 9.98465976675951e-06, |
| "loss": 0.7261, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.37100213219616207, |
| "grad_norm": 0.9360632386011705, |
| "learning_rate": 9.983164846974549e-06, |
| "loss": 0.7111, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3742004264392324, |
| "grad_norm": 0.850822368769409, |
| "learning_rate": 9.981600555812975e-06, |
| "loss": 0.6618, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3773987206823028, |
| "grad_norm": 0.8087687979800354, |
| "learning_rate": 9.979966915051517e-06, |
| "loss": 0.7151, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3805970149253731, |
| "grad_norm": 1.019238061277482, |
| "learning_rate": 9.978263947432331e-06, |
| "loss": 0.7419, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3837953091684435, |
| "grad_norm": 0.6679781593773177, |
| "learning_rate": 9.976491676662679e-06, |
| "loss": 0.6984, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.38699360341151384, |
| "grad_norm": 0.8025666983281575, |
| "learning_rate": 9.974650127414609e-06, |
| "loss": 0.7041, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.39019189765458423, |
| "grad_norm": 0.6935416167824833, |
| "learning_rate": 9.972739325324596e-06, |
| "loss": 0.6972, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.39339019189765456, |
| "grad_norm": 0.8012590306354953, |
| "learning_rate": 9.970759296993205e-06, |
| "loss": 0.6719, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.39658848614072495, |
| "grad_norm": 0.7351333900689639, |
| "learning_rate": 9.968710069984699e-06, |
| "loss": 0.6897, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3997867803837953, |
| "grad_norm": 0.8515682416300615, |
| "learning_rate": 9.966591672826674e-06, |
| "loss": 0.6911, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.40298507462686567, |
| "grad_norm": 0.7745318085363246, |
| "learning_rate": 9.964404135009649e-06, |
| "loss": 0.6768, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.40618336886993606, |
| "grad_norm": 0.7565019041685611, |
| "learning_rate": 9.962147486986664e-06, |
| "loss": 0.7018, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.4093816631130064, |
| "grad_norm": 0.675609674290225, |
| "learning_rate": 9.959821760172849e-06, |
| "loss": 0.6978, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4125799573560768, |
| "grad_norm": 0.8186758848215644, |
| "learning_rate": 9.957426986944994e-06, |
| "loss": 0.6931, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.4157782515991471, |
| "grad_norm": 0.7207234783292092, |
| "learning_rate": 9.95496320064109e-06, |
| "loss": 0.7082, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4189765458422175, |
| "grad_norm": 0.7354378059056297, |
| "learning_rate": 9.952430435559873e-06, |
| "loss": 0.7363, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.42217484008528783, |
| "grad_norm": 0.8258670771003666, |
| "learning_rate": 9.94982872696034e-06, |
| "loss": 0.71, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.4253731343283582, |
| "grad_norm": 0.7436932431094343, |
| "learning_rate": 9.947158111061263e-06, |
| "loss": 0.7276, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.6981052362583808, |
| "learning_rate": 9.94441862504068e-06, |
| "loss": 0.6774, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.43176972281449894, |
| "grad_norm": 0.8840543705470293, |
| "learning_rate": 9.941610307035385e-06, |
| "loss": 0.7257, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4349680170575693, |
| "grad_norm": 0.6694638600717385, |
| "learning_rate": 9.938733196140386e-06, |
| "loss": 0.7029, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.43816631130063965, |
| "grad_norm": 0.8441420481093084, |
| "learning_rate": 9.935787332408375e-06, |
| "loss": 0.7274, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.44136460554371004, |
| "grad_norm": 0.8505776286992377, |
| "learning_rate": 9.932772756849152e-06, |
| "loss": 0.6935, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4445628997867804, |
| "grad_norm": 0.9226301253646606, |
| "learning_rate": 9.929689511429075e-06, |
| "loss": 0.7264, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.44776119402985076, |
| "grad_norm": 0.7579619597097683, |
| "learning_rate": 9.926537639070457e-06, |
| "loss": 0.7076, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4509594882729211, |
| "grad_norm": 0.8076488456889905, |
| "learning_rate": 9.923317183650985e-06, |
| "loss": 0.7003, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.4541577825159915, |
| "grad_norm": 0.8485727454100752, |
| "learning_rate": 9.92002819000309e-06, |
| "loss": 0.7211, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4573560767590618, |
| "grad_norm": 0.751313097685783, |
| "learning_rate": 9.916670703913345e-06, |
| "loss": 0.6859, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.4605543710021322, |
| "grad_norm": 0.7996244442440978, |
| "learning_rate": 9.913244772121811e-06, |
| "loss": 0.7048, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.46375266524520253, |
| "grad_norm": 0.7586742602393676, |
| "learning_rate": 9.90975044232139e-06, |
| "loss": 0.7232, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4669509594882729, |
| "grad_norm": 0.7333771413773464, |
| "learning_rate": 9.90618776315717e-06, |
| "loss": 0.6973, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.4701492537313433, |
| "grad_norm": 0.7462538908947492, |
| "learning_rate": 9.902556784225729e-06, |
| "loss": 0.6956, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.47334754797441364, |
| "grad_norm": 0.7953075557133956, |
| "learning_rate": 9.898857556074469e-06, |
| "loss": 0.7225, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.47654584221748403, |
| "grad_norm": 0.7060064737270609, |
| "learning_rate": 9.895090130200889e-06, |
| "loss": 0.6814, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.47974413646055436, |
| "grad_norm": 0.8120142107026623, |
| "learning_rate": 9.891254559051886e-06, |
| "loss": 0.6731, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.48294243070362475, |
| "grad_norm": 0.9453211383140191, |
| "learning_rate": 9.887350896023015e-06, |
| "loss": 0.6725, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.4861407249466951, |
| "grad_norm": 0.753978978180176, |
| "learning_rate": 9.883379195457747e-06, |
| "loss": 0.6931, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.48933901918976547, |
| "grad_norm": 0.9761958786101086, |
| "learning_rate": 9.879339512646714e-06, |
| "loss": 0.6964, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.4925373134328358, |
| "grad_norm": 0.8092682567275233, |
| "learning_rate": 9.875231903826936e-06, |
| "loss": 0.7033, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4957356076759062, |
| "grad_norm": 0.8755474532977587, |
| "learning_rate": 9.871056426181052e-06, |
| "loss": 0.697, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4989339019189765, |
| "grad_norm": 0.7311549338301705, |
| "learning_rate": 9.8668131378365e-06, |
| "loss": 0.6999, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.502132196162047, |
| "grad_norm": 0.7037792105922891, |
| "learning_rate": 9.862502097864726e-06, |
| "loss": 0.6792, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5053304904051172, |
| "grad_norm": 0.7713310350403229, |
| "learning_rate": 9.858123366280358e-06, |
| "loss": 0.7028, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5085287846481876, |
| "grad_norm": 0.7608743370139368, |
| "learning_rate": 9.853677004040368e-06, |
| "loss": 0.7262, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.511727078891258, |
| "grad_norm": 0.7523293064381485, |
| "learning_rate": 9.849163073043223e-06, |
| "loss": 0.6968, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5149253731343284, |
| "grad_norm": 0.7701188802305937, |
| "learning_rate": 9.844581636128025e-06, |
| "loss": 0.6878, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5181236673773987, |
| "grad_norm": 0.6850118378660099, |
| "learning_rate": 9.83993275707364e-06, |
| "loss": 0.6843, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5213219616204691, |
| "grad_norm": 0.7633903463897005, |
| "learning_rate": 9.835216500597797e-06, |
| "loss": 0.6625, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5245202558635395, |
| "grad_norm": 0.7032490531444039, |
| "learning_rate": 9.830432932356207e-06, |
| "loss": 0.6787, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5277185501066098, |
| "grad_norm": 0.8377452657391034, |
| "learning_rate": 9.82558211894163e-06, |
| "loss": 0.6485, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5309168443496801, |
| "grad_norm": 0.6618917554232225, |
| "learning_rate": 9.820664127882958e-06, |
| "loss": 0.6789, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5341151385927505, |
| "grad_norm": 0.8178971073023995, |
| "learning_rate": 9.815679027644273e-06, |
| "loss": 0.7136, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5373134328358209, |
| "grad_norm": 0.7226926940415138, |
| "learning_rate": 9.8106268876239e-06, |
| "loss": 0.6973, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5405117270788913, |
| "grad_norm": 0.8219860052512269, |
| "learning_rate": 9.805507778153423e-06, |
| "loss": 0.6833, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5437100213219617, |
| "grad_norm": 0.8090438198755149, |
| "learning_rate": 9.800321770496726e-06, |
| "loss": 0.6683, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5469083155650319, |
| "grad_norm": 0.8076219868171068, |
| "learning_rate": 9.79506893684899e-06, |
| "loss": 0.6742, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5501066098081023, |
| "grad_norm": 0.7650394887530159, |
| "learning_rate": 9.789749350335693e-06, |
| "loss": 0.6809, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5533049040511727, |
| "grad_norm": 0.6909569362578575, |
| "learning_rate": 9.784363085011587e-06, |
| "loss": 0.6987, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5565031982942431, |
| "grad_norm": 0.7544714952212014, |
| "learning_rate": 9.778910215859666e-06, |
| "loss": 0.7286, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5597014925373134, |
| "grad_norm": 0.6910806053162777, |
| "learning_rate": 9.773390818790136e-06, |
| "loss": 0.7004, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5628997867803838, |
| "grad_norm": 0.728635086997373, |
| "learning_rate": 9.767804970639338e-06, |
| "loss": 0.7107, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5660980810234542, |
| "grad_norm": 0.7164830915823774, |
| "learning_rate": 9.762152749168693e-06, |
| "loss": 0.6659, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5692963752665245, |
| "grad_norm": 0.7185222557170264, |
| "learning_rate": 9.756434233063616e-06, |
| "loss": 0.7204, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5724946695095949, |
| "grad_norm": 0.6376198071084583, |
| "learning_rate": 9.750649501932414e-06, |
| "loss": 0.6851, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5756929637526652, |
| "grad_norm": 0.7372459805811605, |
| "learning_rate": 9.744798636305189e-06, |
| "loss": 0.6655, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5788912579957356, |
| "grad_norm": 0.7192794699200221, |
| "learning_rate": 9.738881717632709e-06, |
| "loss": 0.6981, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.582089552238806, |
| "grad_norm": 0.8372267929782506, |
| "learning_rate": 9.732898828285273e-06, |
| "loss": 0.6609, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5852878464818764, |
| "grad_norm": 0.6890860653831234, |
| "learning_rate": 9.726850051551575e-06, |
| "loss": 0.6849, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5884861407249466, |
| "grad_norm": 1.0979021153485466, |
| "learning_rate": 9.72073547163753e-06, |
| "loss": 0.7055, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.591684434968017, |
| "grad_norm": 0.718530118293507, |
| "learning_rate": 9.714555173665112e-06, |
| "loss": 0.6793, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5948827292110874, |
| "grad_norm": 0.9568746124941592, |
| "learning_rate": 9.708309243671167e-06, |
| "loss": 0.6805, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5980810234541578, |
| "grad_norm": 0.9190645493855029, |
| "learning_rate": 9.701997768606209e-06, |
| "loss": 0.6578, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6012793176972282, |
| "grad_norm": 0.8452449705256939, |
| "learning_rate": 9.695620836333219e-06, |
| "loss": 0.6752, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6044776119402985, |
| "grad_norm": 0.912469389514112, |
| "learning_rate": 9.68917853562642e-06, |
| "loss": 0.677, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6076759061833689, |
| "grad_norm": 0.8380041206744561, |
| "learning_rate": 9.68267095617003e-06, |
| "loss": 0.6985, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6108742004264393, |
| "grad_norm": 0.863868290234638, |
| "learning_rate": 9.676098188557032e-06, |
| "loss": 0.7313, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.6140724946695096, |
| "grad_norm": 0.8174466263379166, |
| "learning_rate": 9.669460324287899e-06, |
| "loss": 0.7053, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.6172707889125799, |
| "grad_norm": 0.8909312106674119, |
| "learning_rate": 9.662757455769317e-06, |
| "loss": 0.682, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6204690831556503, |
| "grad_norm": 0.7302375395625422, |
| "learning_rate": 9.655989676312918e-06, |
| "loss": 0.6594, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.6236673773987207, |
| "grad_norm": 0.8735527453330153, |
| "learning_rate": 9.649157080133962e-06, |
| "loss": 0.6674, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6268656716417911, |
| "grad_norm": 0.7064746459902383, |
| "learning_rate": 9.642259762350034e-06, |
| "loss": 0.7017, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6300639658848614, |
| "grad_norm": 0.8655024689230618, |
| "learning_rate": 9.635297818979715e-06, |
| "loss": 0.6661, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6332622601279317, |
| "grad_norm": 0.728341775629865, |
| "learning_rate": 9.628271346941252e-06, |
| "loss": 0.6925, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6364605543710021, |
| "grad_norm": 0.6968228679265215, |
| "learning_rate": 9.621180444051206e-06, |
| "loss": 0.685, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6396588486140725, |
| "grad_norm": 0.8921095235064438, |
| "learning_rate": 9.614025209023084e-06, |
| "loss": 0.7016, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 0.7770028881285537, |
| "learning_rate": 9.606805741465977e-06, |
| "loss": 0.7077, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.6460554371002132, |
| "grad_norm": 0.7270295337970796, |
| "learning_rate": 9.59952214188316e-06, |
| "loss": 0.6779, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6492537313432836, |
| "grad_norm": 0.8302887272339062, |
| "learning_rate": 9.592174511670704e-06, |
| "loss": 0.6722, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.652452025586354, |
| "grad_norm": 0.8688719467779429, |
| "learning_rate": 9.58476295311606e-06, |
| "loss": 0.7104, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6556503198294243, |
| "grad_norm": 0.6323690135784534, |
| "learning_rate": 9.577287569396632e-06, |
| "loss": 0.6927, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6588486140724946, |
| "grad_norm": 0.8941706514509578, |
| "learning_rate": 9.569748464578343e-06, |
| "loss": 0.6926, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.662046908315565, |
| "grad_norm": 0.7211897394095068, |
| "learning_rate": 9.562145743614193e-06, |
| "loss": 0.7006, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.6652452025586354, |
| "grad_norm": 0.655783741036758, |
| "learning_rate": 9.554479512342785e-06, |
| "loss": 0.7108, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6684434968017058, |
| "grad_norm": 0.7467548640108442, |
| "learning_rate": 9.54674987748686e-06, |
| "loss": 0.6649, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6716417910447762, |
| "grad_norm": 0.7171171346221873, |
| "learning_rate": 9.538956946651816e-06, |
| "loss": 0.6531, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6748400852878464, |
| "grad_norm": 0.7120324790582493, |
| "learning_rate": 9.531100828324191e-06, |
| "loss": 0.6795, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6780383795309168, |
| "grad_norm": 0.675292328763824, |
| "learning_rate": 9.52318163187018e-06, |
| "loss": 0.6883, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6812366737739872, |
| "grad_norm": 0.9706242893642346, |
| "learning_rate": 9.515199467534086e-06, |
| "loss": 0.7105, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6844349680170576, |
| "grad_norm": 0.6339606603859417, |
| "learning_rate": 9.507154446436806e-06, |
| "loss": 0.7006, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6876332622601279, |
| "grad_norm": 0.8270660302761437, |
| "learning_rate": 9.499046680574267e-06, |
| "loss": 0.6937, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6908315565031983, |
| "grad_norm": 0.8339411710564606, |
| "learning_rate": 9.490876282815884e-06, |
| "loss": 0.7089, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6940298507462687, |
| "grad_norm": 0.7095006207653165, |
| "learning_rate": 9.482643366902972e-06, |
| "loss": 0.6774, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.697228144989339, |
| "grad_norm": 0.8701992961050132, |
| "learning_rate": 9.474348047447177e-06, |
| "loss": 0.6987, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.7004264392324094, |
| "grad_norm": 0.8555157892062962, |
| "learning_rate": 9.465990439928868e-06, |
| "loss": 0.6888, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.7036247334754797, |
| "grad_norm": 0.7482188301136834, |
| "learning_rate": 9.457570660695542e-06, |
| "loss": 0.6924, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7068230277185501, |
| "grad_norm": 0.6679769126751529, |
| "learning_rate": 9.449088826960187e-06, |
| "loss": 0.6814, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.7100213219616205, |
| "grad_norm": 0.6481228296820869, |
| "learning_rate": 9.440545056799677e-06, |
| "loss": 0.6927, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.7132196162046909, |
| "grad_norm": 0.6138493570700504, |
| "learning_rate": 9.431939469153096e-06, |
| "loss": 0.6848, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.7164179104477612, |
| "grad_norm": 0.6857392440178816, |
| "learning_rate": 9.423272183820109e-06, |
| "loss": 0.7016, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.7196162046908315, |
| "grad_norm": 0.6565822633668216, |
| "learning_rate": 9.41454332145928e-06, |
| "loss": 0.684, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7228144989339019, |
| "grad_norm": 0.7686352189035123, |
| "learning_rate": 9.405753003586396e-06, |
| "loss": 0.662, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.7260127931769723, |
| "grad_norm": 0.6889440928778451, |
| "learning_rate": 9.396901352572771e-06, |
| "loss": 0.6757, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.7292110874200426, |
| "grad_norm": 0.6644883797889718, |
| "learning_rate": 9.387988491643558e-06, |
| "loss": 0.69, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.732409381663113, |
| "grad_norm": 0.7626373390965248, |
| "learning_rate": 9.379014544876011e-06, |
| "loss": 0.7187, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.7356076759061834, |
| "grad_norm": 0.704507663933334, |
| "learning_rate": 9.369979637197774e-06, |
| "loss": 0.6585, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7388059701492538, |
| "grad_norm": 0.7336157000029004, |
| "learning_rate": 9.360883894385137e-06, |
| "loss": 0.7127, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.7420042643923241, |
| "grad_norm": 0.6546636167664962, |
| "learning_rate": 9.351727443061284e-06, |
| "loss": 0.704, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.7452025586353944, |
| "grad_norm": 0.7043313515356551, |
| "learning_rate": 9.342510410694529e-06, |
| "loss": 0.6852, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7484008528784648, |
| "grad_norm": 0.9011726584289667, |
| "learning_rate": 9.33323292559655e-06, |
| "loss": 0.7081, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7515991471215352, |
| "grad_norm": 0.7292470054254178, |
| "learning_rate": 9.323895116920591e-06, |
| "loss": 0.669, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7547974413646056, |
| "grad_norm": 0.8382635859763398, |
| "learning_rate": 9.31449711465967e-06, |
| "loss": 0.6763, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7579957356076759, |
| "grad_norm": 0.7483080444786336, |
| "learning_rate": 9.305039049644772e-06, |
| "loss": 0.6567, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7611940298507462, |
| "grad_norm": 0.7261935584991153, |
| "learning_rate": 9.29552105354302e-06, |
| "loss": 0.6714, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7643923240938166, |
| "grad_norm": 0.8645856822885789, |
| "learning_rate": 9.28594325885585e-06, |
| "loss": 0.6625, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.767590618336887, |
| "grad_norm": 0.7568040003927768, |
| "learning_rate": 9.27630579891716e-06, |
| "loss": 0.6191, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7707889125799574, |
| "grad_norm": 0.7959983248684005, |
| "learning_rate": 9.266608807891459e-06, |
| "loss": 0.72, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.7739872068230277, |
| "grad_norm": 0.6158464477516697, |
| "learning_rate": 9.256852420771999e-06, |
| "loss": 0.6984, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.7771855010660981, |
| "grad_norm": 0.6475971949577826, |
| "learning_rate": 9.24703677337889e-06, |
| "loss": 0.6707, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.7803837953091685, |
| "grad_norm": 0.7514772530157263, |
| "learning_rate": 9.237162002357214e-06, |
| "loss": 0.7025, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.7835820895522388, |
| "grad_norm": 0.7153934193506174, |
| "learning_rate": 9.227228245175127e-06, |
| "loss": 0.7084, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7867803837953091, |
| "grad_norm": 0.6510532625278841, |
| "learning_rate": 9.217235640121927e-06, |
| "loss": 0.7032, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7899786780383795, |
| "grad_norm": 0.648491391649832, |
| "learning_rate": 9.207184326306155e-06, |
| "loss": 0.703, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7931769722814499, |
| "grad_norm": 0.8034299471010478, |
| "learning_rate": 9.197074443653643e-06, |
| "loss": 0.7118, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7963752665245203, |
| "grad_norm": 0.7868890537454554, |
| "learning_rate": 9.186906132905563e-06, |
| "loss": 0.6907, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7995735607675906, |
| "grad_norm": 0.7573924459698659, |
| "learning_rate": 9.176679535616477e-06, |
| "loss": 0.7133, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.802771855010661, |
| "grad_norm": 0.7039169561600006, |
| "learning_rate": 9.166394794152363e-06, |
| "loss": 0.6749, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.8059701492537313, |
| "grad_norm": 0.8423685923463412, |
| "learning_rate": 9.156052051688633e-06, |
| "loss": 0.7378, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.8091684434968017, |
| "grad_norm": 0.6131291481922342, |
| "learning_rate": 9.145651452208133e-06, |
| "loss": 0.6415, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.8123667377398721, |
| "grad_norm": 0.7195983825244481, |
| "learning_rate": 9.135193140499155e-06, |
| "loss": 0.6613, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.8155650319829424, |
| "grad_norm": 0.8577511387252152, |
| "learning_rate": 9.124677262153405e-06, |
| "loss": 0.721, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8187633262260128, |
| "grad_norm": 0.7278841089447043, |
| "learning_rate": 9.114103963563986e-06, |
| "loss": 0.6738, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.8219616204690832, |
| "grad_norm": 0.6856569850641855, |
| "learning_rate": 9.103473391923354e-06, |
| "loss": 0.6588, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.8251599147121536, |
| "grad_norm": 0.9021119379041687, |
| "learning_rate": 9.092785695221271e-06, |
| "loss": 0.7182, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8283582089552238, |
| "grad_norm": 0.8796683162834094, |
| "learning_rate": 9.08204102224275e-06, |
| "loss": 0.6816, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.8315565031982942, |
| "grad_norm": 0.847520407573671, |
| "learning_rate": 9.071239522565978e-06, |
| "loss": 0.6508, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8347547974413646, |
| "grad_norm": 0.9049944252517715, |
| "learning_rate": 9.06038134656023e-06, |
| "loss": 0.6925, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.837953091684435, |
| "grad_norm": 0.8571499479329611, |
| "learning_rate": 9.049466645383785e-06, |
| "loss": 0.681, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8411513859275054, |
| "grad_norm": 0.745522815593341, |
| "learning_rate": 9.038495570981814e-06, |
| "loss": 0.6525, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.8443496801705757, |
| "grad_norm": 0.947292350163867, |
| "learning_rate": 9.027468276084274e-06, |
| "loss": 0.6696, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.847547974413646, |
| "grad_norm": 0.8373012552761833, |
| "learning_rate": 9.016384914203771e-06, |
| "loss": 0.6782, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8507462686567164, |
| "grad_norm": 0.7211699745049627, |
| "learning_rate": 9.00524563963343e-06, |
| "loss": 0.7015, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.8539445628997868, |
| "grad_norm": 0.973744577195585, |
| "learning_rate": 8.99405060744474e-06, |
| "loss": 0.6598, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.7600474078060623, |
| "learning_rate": 8.982799973485407e-06, |
| "loss": 0.6853, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.8603411513859275, |
| "grad_norm": 0.7568774174674711, |
| "learning_rate": 8.971493894377174e-06, |
| "loss": 0.6963, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.8635394456289979, |
| "grad_norm": 1.2096518035065966, |
| "learning_rate": 8.960132527513642e-06, |
| "loss": 0.6901, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8667377398720683, |
| "grad_norm": 0.8943559219904526, |
| "learning_rate": 8.94871603105809e-06, |
| "loss": 0.6782, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.8699360341151386, |
| "grad_norm": 0.7461346678625066, |
| "learning_rate": 8.937244563941248e-06, |
| "loss": 0.7018, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8731343283582089, |
| "grad_norm": 1.0000505503295154, |
| "learning_rate": 8.925718285859118e-06, |
| "loss": 0.6875, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.8763326226012793, |
| "grad_norm": 0.7858176236082158, |
| "learning_rate": 8.914137357270723e-06, |
| "loss": 0.7121, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.8795309168443497, |
| "grad_norm": 0.7063872884648656, |
| "learning_rate": 8.902501939395887e-06, |
| "loss": 0.6695, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8827292110874201, |
| "grad_norm": 0.8044416558628715, |
| "learning_rate": 8.890812194212987e-06, |
| "loss": 0.7076, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8859275053304904, |
| "grad_norm": 0.6393740328761567, |
| "learning_rate": 8.879068284456702e-06, |
| "loss": 0.6556, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.8891257995735607, |
| "grad_norm": 0.857231208160294, |
| "learning_rate": 8.867270373615735e-06, |
| "loss": 0.7148, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8923240938166311, |
| "grad_norm": 0.6337872608316819, |
| "learning_rate": 8.855418625930556e-06, |
| "loss": 0.7365, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.8955223880597015, |
| "grad_norm": 0.6706185344174747, |
| "learning_rate": 8.8435132063911e-06, |
| "loss": 0.6699, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8987206823027718, |
| "grad_norm": 0.7688016478702769, |
| "learning_rate": 8.83155428073448e-06, |
| "loss": 0.6695, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.9019189765458422, |
| "grad_norm": 0.6859750413913875, |
| "learning_rate": 8.81954201544267e-06, |
| "loss": 0.7105, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.9051172707889126, |
| "grad_norm": 0.736049871487562, |
| "learning_rate": 8.8074765777402e-06, |
| "loss": 0.6566, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.908315565031983, |
| "grad_norm": 0.6566455673708244, |
| "learning_rate": 8.79535813559181e-06, |
| "loss": 0.7087, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.9115138592750534, |
| "grad_norm": 0.6678755774132027, |
| "learning_rate": 8.783186857700137e-06, |
| "loss": 0.6803, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9147121535181236, |
| "grad_norm": 0.6358862703989032, |
| "learning_rate": 8.77096291350334e-06, |
| "loss": 0.6477, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.917910447761194, |
| "grad_norm": 0.621135300076204, |
| "learning_rate": 8.75868647317276e-06, |
| "loss": 0.6853, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.9211087420042644, |
| "grad_norm": 0.8269166477046754, |
| "learning_rate": 8.746357707610544e-06, |
| "loss": 0.7068, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.9243070362473348, |
| "grad_norm": 0.754354903003167, |
| "learning_rate": 8.733976788447265e-06, |
| "loss": 0.6775, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.9275053304904051, |
| "grad_norm": 0.6328527198398567, |
| "learning_rate": 8.721543888039534e-06, |
| "loss": 0.6738, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9307036247334755, |
| "grad_norm": 0.7230763976135067, |
| "learning_rate": 8.709059179467598e-06, |
| "loss": 0.6448, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.9339019189765458, |
| "grad_norm": 0.6877836548216805, |
| "learning_rate": 8.69652283653294e-06, |
| "loss": 0.6798, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.9371002132196162, |
| "grad_norm": 0.7564591211606494, |
| "learning_rate": 8.683935033755848e-06, |
| "loss": 0.6953, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.9402985074626866, |
| "grad_norm": 0.668332745907526, |
| "learning_rate": 8.671295946372989e-06, |
| "loss": 0.681, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.9434968017057569, |
| "grad_norm": 0.7159527856011911, |
| "learning_rate": 8.658605750334972e-06, |
| "loss": 0.6988, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9466950959488273, |
| "grad_norm": 0.6848922808640394, |
| "learning_rate": 8.6458646223039e-06, |
| "loss": 0.641, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.9498933901918977, |
| "grad_norm": 0.7548334675392807, |
| "learning_rate": 8.6330727396509e-06, |
| "loss": 0.6713, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.9530916844349681, |
| "grad_norm": 0.5861356337358591, |
| "learning_rate": 8.620230280453672e-06, |
| "loss": 0.691, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.9562899786780383, |
| "grad_norm": 0.7949755015478441, |
| "learning_rate": 8.607337423493996e-06, |
| "loss": 0.6808, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.9594882729211087, |
| "grad_norm": 0.6765008542247006, |
| "learning_rate": 8.594394348255239e-06, |
| "loss": 0.6898, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9626865671641791, |
| "grad_norm": 0.5806572889436659, |
| "learning_rate": 8.581401234919873e-06, |
| "loss": 0.6492, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.9658848614072495, |
| "grad_norm": 0.7469524089876209, |
| "learning_rate": 8.568358264366958e-06, |
| "loss": 0.6821, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.9690831556503199, |
| "grad_norm": 0.7249833696216659, |
| "learning_rate": 8.555265618169615e-06, |
| "loss": 0.6522, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.9722814498933902, |
| "grad_norm": 0.6218765386847785, |
| "learning_rate": 8.542123478592518e-06, |
| "loss": 0.6843, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.9754797441364605, |
| "grad_norm": 0.7551061996851178, |
| "learning_rate": 8.528932028589337e-06, |
| "loss": 0.6728, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9786780383795309, |
| "grad_norm": 0.674338314023206, |
| "learning_rate": 8.515691451800206e-06, |
| "loss": 0.6736, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.9818763326226013, |
| "grad_norm": 0.7849066941451676, |
| "learning_rate": 8.502401932549154e-06, |
| "loss": 0.6878, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.9850746268656716, |
| "grad_norm": 0.7498368721683543, |
| "learning_rate": 8.489063655841552e-06, |
| "loss": 0.6903, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.988272921108742, |
| "grad_norm": 0.665126766182557, |
| "learning_rate": 8.475676807361526e-06, |
| "loss": 0.6552, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.9914712153518124, |
| "grad_norm": 0.637731666313635, |
| "learning_rate": 8.462241573469378e-06, |
| "loss": 0.6769, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9946695095948828, |
| "grad_norm": 0.6112924731775418, |
| "learning_rate": 8.448758141198991e-06, |
| "loss": 0.677, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.997867803837953, |
| "grad_norm": 0.7383946127305492, |
| "learning_rate": 8.435226698255228e-06, |
| "loss": 0.6746, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.0010660980810235, |
| "grad_norm": 0.626462069677627, |
| "learning_rate": 8.421647433011306e-06, |
| "loss": 0.6566, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.004264392324094, |
| "grad_norm": 0.7510688924872979, |
| "learning_rate": 8.408020534506195e-06, |
| "loss": 0.6281, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.007462686567164, |
| "grad_norm": 0.7595637013734888, |
| "learning_rate": 8.394346192441967e-06, |
| "loss": 0.6484, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.0106609808102345, |
| "grad_norm": 0.8191267012599663, |
| "learning_rate": 8.380624597181165e-06, |
| "loss": 0.6269, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.0138592750533049, |
| "grad_norm": 0.6920756752948478, |
| "learning_rate": 8.366855939744152e-06, |
| "loss": 0.6407, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.0170575692963753, |
| "grad_norm": 0.8113121478207607, |
| "learning_rate": 8.353040411806449e-06, |
| "loss": 0.6379, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.0202558635394456, |
| "grad_norm": 0.6038342001520437, |
| "learning_rate": 8.339178205696067e-06, |
| "loss": 0.6219, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.023454157782516, |
| "grad_norm": 0.6525041753408835, |
| "learning_rate": 8.325269514390835e-06, |
| "loss": 0.6451, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0266524520255864, |
| "grad_norm": 0.7876363756773992, |
| "learning_rate": 8.311314531515707e-06, |
| "loss": 0.6556, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.0298507462686568, |
| "grad_norm": 0.6729418158706656, |
| "learning_rate": 8.297313451340064e-06, |
| "loss": 0.6278, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.033049040511727, |
| "grad_norm": 0.7323640919082973, |
| "learning_rate": 8.283266468775024e-06, |
| "loss": 0.635, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.0362473347547974, |
| "grad_norm": 0.7456586009899282, |
| "learning_rate": 8.269173779370712e-06, |
| "loss": 0.6301, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.0394456289978677, |
| "grad_norm": 0.6472864535960438, |
| "learning_rate": 8.255035579313545e-06, |
| "loss": 0.6514, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.0426439232409381, |
| "grad_norm": 0.7261880506431115, |
| "learning_rate": 8.240852065423507e-06, |
| "loss": 0.6255, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.0458422174840085, |
| "grad_norm": 0.7277147979365199, |
| "learning_rate": 8.226623435151389e-06, |
| "loss": 0.5958, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.049040511727079, |
| "grad_norm": 0.6624168018368254, |
| "learning_rate": 8.21234988657607e-06, |
| "loss": 0.6179, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.0522388059701493, |
| "grad_norm": 0.7453935828883017, |
| "learning_rate": 8.198031618401733e-06, |
| "loss": 0.6108, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.0554371002132197, |
| "grad_norm": 0.7264796554563445, |
| "learning_rate": 8.183668829955111e-06, |
| "loss": 0.6238, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.05863539445629, |
| "grad_norm": 0.7682551654401545, |
| "learning_rate": 8.169261721182715e-06, |
| "loss": 0.6191, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.0618336886993602, |
| "grad_norm": 0.6228417217082994, |
| "learning_rate": 8.154810492648038e-06, |
| "loss": 0.6316, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.0650319829424306, |
| "grad_norm": 0.7327288572841154, |
| "learning_rate": 8.140315345528778e-06, |
| "loss": 0.6586, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.068230277185501, |
| "grad_norm": 0.6964883436081012, |
| "learning_rate": 8.125776481614025e-06, |
| "loss": 0.6414, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 0.6054633221728152, |
| "learning_rate": 8.111194103301461e-06, |
| "loss": 0.5893, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.0746268656716418, |
| "grad_norm": 0.6959078284441366, |
| "learning_rate": 8.096568413594533e-06, |
| "loss": 0.6209, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.0778251599147122, |
| "grad_norm": 0.6687442847144212, |
| "learning_rate": 8.081899616099638e-06, |
| "loss": 0.6395, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.0810234541577826, |
| "grad_norm": 0.6672132910162282, |
| "learning_rate": 8.067187915023283e-06, |
| "loss": 0.6453, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.084221748400853, |
| "grad_norm": 0.7011871375952131, |
| "learning_rate": 8.052433515169235e-06, |
| "loss": 0.6578, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.0874200426439233, |
| "grad_norm": 0.6341715440450284, |
| "learning_rate": 8.037636621935686e-06, |
| "loss": 0.5993, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0906183368869935, |
| "grad_norm": 0.7942056782846189, |
| "learning_rate": 8.022797441312376e-06, |
| "loss": 0.6377, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.0938166311300639, |
| "grad_norm": 0.6703609675609049, |
| "learning_rate": 8.007916179877742e-06, |
| "loss": 0.6262, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.0970149253731343, |
| "grad_norm": 0.6839776213487608, |
| "learning_rate": 7.99299304479603e-06, |
| "loss": 0.6244, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.1002132196162047, |
| "grad_norm": 0.7336708760979732, |
| "learning_rate": 7.978028243814416e-06, |
| "loss": 0.6607, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.103411513859275, |
| "grad_norm": 0.6632430703502449, |
| "learning_rate": 7.96302198526011e-06, |
| "loss": 0.6556, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.1066098081023454, |
| "grad_norm": 0.695919842154835, |
| "learning_rate": 7.947974478037468e-06, |
| "loss": 0.6194, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.1098081023454158, |
| "grad_norm": 0.654985383307004, |
| "learning_rate": 7.932885931625063e-06, |
| "loss": 0.6015, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.1130063965884862, |
| "grad_norm": 0.7284161828955869, |
| "learning_rate": 7.917756556072792e-06, |
| "loss": 0.6238, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.1162046908315566, |
| "grad_norm": 0.7633948849076417, |
| "learning_rate": 7.902586561998928e-06, |
| "loss": 0.6829, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.1194029850746268, |
| "grad_norm": 0.7808526782166596, |
| "learning_rate": 7.887376160587214e-06, |
| "loss": 0.6286, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.1226012793176972, |
| "grad_norm": 0.7168210695937199, |
| "learning_rate": 7.8721255635839e-06, |
| "loss": 0.655, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.1257995735607675, |
| "grad_norm": 0.7093615169947094, |
| "learning_rate": 7.85683498329481e-06, |
| "loss": 0.6082, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.128997867803838, |
| "grad_norm": 0.7319031414944822, |
| "learning_rate": 7.841504632582378e-06, |
| "loss": 0.6046, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.1321961620469083, |
| "grad_norm": 0.7551618483349399, |
| "learning_rate": 7.826134724862687e-06, |
| "loss": 0.6229, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.1353944562899787, |
| "grad_norm": 0.5826096940443887, |
| "learning_rate": 7.810725474102504e-06, |
| "loss": 0.6491, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.138592750533049, |
| "grad_norm": 0.6417225168262856, |
| "learning_rate": 7.795277094816292e-06, |
| "loss": 0.6261, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.1417910447761195, |
| "grad_norm": 0.6906660028796798, |
| "learning_rate": 7.779789802063229e-06, |
| "loss": 0.6716, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.1449893390191899, |
| "grad_norm": 0.6599388631575054, |
| "learning_rate": 7.764263811444214e-06, |
| "loss": 0.6223, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.14818763326226, |
| "grad_norm": 0.6878046102310988, |
| "learning_rate": 7.748699339098864e-06, |
| "loss": 0.6535, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.1513859275053304, |
| "grad_norm": 0.6983975410121682, |
| "learning_rate": 7.733096601702508e-06, |
| "loss": 0.6175, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.1545842217484008, |
| "grad_norm": 0.7101392030170859, |
| "learning_rate": 7.717455816463161e-06, |
| "loss": 0.6064, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.1577825159914712, |
| "grad_norm": 0.7138468401070005, |
| "learning_rate": 7.70177720111852e-06, |
| "loss": 0.6318, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.1609808102345416, |
| "grad_norm": 0.8020962425971306, |
| "learning_rate": 7.68606097393291e-06, |
| "loss": 0.5551, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.164179104477612, |
| "grad_norm": 0.5838755573325491, |
| "learning_rate": 7.67030735369426e-06, |
| "loss": 0.6681, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.1673773987206824, |
| "grad_norm": 0.7126521508489613, |
| "learning_rate": 7.654516559711053e-06, |
| "loss": 0.6146, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.1705756929637527, |
| "grad_norm": 0.786856054891589, |
| "learning_rate": 7.638688811809274e-06, |
| "loss": 0.592, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.1737739872068231, |
| "grad_norm": 0.6150312500511127, |
| "learning_rate": 7.622824330329345e-06, |
| "loss": 0.6454, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.1769722814498933, |
| "grad_norm": 0.6533779011208625, |
| "learning_rate": 7.6069233361230696e-06, |
| "loss": 0.5972, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.1801705756929637, |
| "grad_norm": 0.7040587378616768, |
| "learning_rate": 7.590986050550542e-06, |
| "loss": 0.6506, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.183368869936034, |
| "grad_norm": 0.6498966757491511, |
| "learning_rate": 7.575012695477076e-06, |
| "loss": 0.6686, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1865671641791045, |
| "grad_norm": 0.686323024280706, |
| "learning_rate": 7.55900349327012e-06, |
| "loss": 0.6156, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.1897654584221748, |
| "grad_norm": 0.6423904326022212, |
| "learning_rate": 7.542958666796149e-06, |
| "loss": 0.6074, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.1929637526652452, |
| "grad_norm": 0.6178701938146715, |
| "learning_rate": 7.526878439417572e-06, |
| "loss": 0.6224, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.1961620469083156, |
| "grad_norm": 0.6496299199143347, |
| "learning_rate": 7.510763034989616e-06, |
| "loss": 0.6122, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.199360341151386, |
| "grad_norm": 0.6906674571020524, |
| "learning_rate": 7.494612677857218e-06, |
| "loss": 0.5982, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.2025586353944564, |
| "grad_norm": 0.7648845383563382, |
| "learning_rate": 7.478427592851894e-06, |
| "loss": 0.6388, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.2057569296375266, |
| "grad_norm": 0.696212830007392, |
| "learning_rate": 7.462208005288609e-06, |
| "loss": 0.6195, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.208955223880597, |
| "grad_norm": 0.8576480726323676, |
| "learning_rate": 7.44595414096265e-06, |
| "loss": 0.62, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.2121535181236673, |
| "grad_norm": 0.6976210286882664, |
| "learning_rate": 7.429666226146468e-06, |
| "loss": 0.6416, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.2153518123667377, |
| "grad_norm": 0.720435239108808, |
| "learning_rate": 7.413344487586542e-06, |
| "loss": 0.663, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.2185501066098081, |
| "grad_norm": 0.6855025363671802, |
| "learning_rate": 7.396989152500215e-06, |
| "loss": 0.6244, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.2217484008528785, |
| "grad_norm": 0.7874191554629412, |
| "learning_rate": 7.380600448572532e-06, |
| "loss": 0.669, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.224946695095949, |
| "grad_norm": 0.7434853121491252, |
| "learning_rate": 7.364178603953066e-06, |
| "loss": 0.6448, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.2281449893390193, |
| "grad_norm": 0.61487183758893, |
| "learning_rate": 7.347723847252756e-06, |
| "loss": 0.6406, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.2313432835820897, |
| "grad_norm": 0.6917610834136999, |
| "learning_rate": 7.331236407540704e-06, |
| "loss": 0.6311, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.2345415778251598, |
| "grad_norm": 0.7293794438462118, |
| "learning_rate": 7.314716514341007e-06, |
| "loss": 0.6496, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.2377398720682302, |
| "grad_norm": 0.6943306944663377, |
| "learning_rate": 7.298164397629545e-06, |
| "loss": 0.695, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.2409381663113006, |
| "grad_norm": 0.727606577778182, |
| "learning_rate": 7.28158028783079e-06, |
| "loss": 0.6472, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.244136460554371, |
| "grad_norm": 0.6612780887089154, |
| "learning_rate": 7.2649644158145925e-06, |
| "loss": 0.6581, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.2473347547974414, |
| "grad_norm": 0.6796926109344666, |
| "learning_rate": 7.248317012892969e-06, |
| "loss": 0.6455, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.2505330490405118, |
| "grad_norm": 0.7347393619920833, |
| "learning_rate": 7.231638310816888e-06, |
| "loss": 0.6135, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.2537313432835822, |
| "grad_norm": 0.6507765447015836, |
| "learning_rate": 7.214928541773027e-06, |
| "loss": 0.6528, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.2569296375266523, |
| "grad_norm": 0.6378971746133411, |
| "learning_rate": 7.198187938380565e-06, |
| "loss": 0.64, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.260127931769723, |
| "grad_norm": 0.6973995690775606, |
| "learning_rate": 7.1814167336879195e-06, |
| "loss": 0.6372, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.263326226012793, |
| "grad_norm": 0.6611835574299032, |
| "learning_rate": 7.164615161169518e-06, |
| "loss": 0.641, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.2665245202558635, |
| "grad_norm": 0.5638998333261266, |
| "learning_rate": 7.147783454722545e-06, |
| "loss": 0.6191, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.2697228144989339, |
| "grad_norm": 0.5840735647196549, |
| "learning_rate": 7.130921848663678e-06, |
| "loss": 0.6461, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.2729211087420043, |
| "grad_norm": 0.6067150183156588, |
| "learning_rate": 7.1140305777258355e-06, |
| "loss": 0.6534, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.2761194029850746, |
| "grad_norm": 0.5533289520902701, |
| "learning_rate": 7.097109877054906e-06, |
| "loss": 0.6251, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.279317697228145, |
| "grad_norm": 0.6286453359485137, |
| "learning_rate": 7.080159982206471e-06, |
| "loss": 0.6192, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2825159914712154, |
| "grad_norm": 0.5518718217422934, |
| "learning_rate": 7.06318112914253e-06, |
| "loss": 0.6014, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.5849867238933993, |
| "learning_rate": 7.046173554228213e-06, |
| "loss": 0.6131, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.2889125799573562, |
| "grad_norm": 0.6401106522610778, |
| "learning_rate": 7.029137494228491e-06, |
| "loss": 0.6638, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.2921108742004264, |
| "grad_norm": 0.571567700082923, |
| "learning_rate": 7.012073186304885e-06, |
| "loss": 0.6347, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.2953091684434968, |
| "grad_norm": 0.5618388361367395, |
| "learning_rate": 6.994980868012151e-06, |
| "loss": 0.6374, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.2985074626865671, |
| "grad_norm": 0.5861018343702368, |
| "learning_rate": 6.9778607772949894e-06, |
| "loss": 0.6427, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.3017057569296375, |
| "grad_norm": 0.5732316822425729, |
| "learning_rate": 6.9607131524847175e-06, |
| "loss": 0.6153, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.304904051172708, |
| "grad_norm": 0.6579936163217828, |
| "learning_rate": 6.943538232295965e-06, |
| "loss": 0.6822, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.3081023454157783, |
| "grad_norm": 0.6429931495365935, |
| "learning_rate": 6.926336255823341e-06, |
| "loss": 0.6703, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.3113006396588487, |
| "grad_norm": 0.6970899348524371, |
| "learning_rate": 6.909107462538113e-06, |
| "loss": 0.6118, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.3144989339019189, |
| "grad_norm": 0.6680718435346977, |
| "learning_rate": 6.891852092284863e-06, |
| "loss": 0.6485, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.3176972281449895, |
| "grad_norm": 0.6600380168580939, |
| "learning_rate": 6.874570385278161e-06, |
| "loss": 0.6717, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.3208955223880596, |
| "grad_norm": 0.802744507326931, |
| "learning_rate": 6.857262582099209e-06, |
| "loss": 0.6225, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.32409381663113, |
| "grad_norm": 0.5662862687056852, |
| "learning_rate": 6.839928923692505e-06, |
| "loss": 0.6327, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.3272921108742004, |
| "grad_norm": 0.7774960950807415, |
| "learning_rate": 6.822569651362475e-06, |
| "loss": 0.629, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.3304904051172708, |
| "grad_norm": 0.7049572238409981, |
| "learning_rate": 6.805185006770125e-06, |
| "loss": 0.6693, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.3336886993603412, |
| "grad_norm": 0.583155396015153, |
| "learning_rate": 6.787775231929666e-06, |
| "loss": 0.6237, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.3368869936034116, |
| "grad_norm": 0.7111519870233683, |
| "learning_rate": 6.7703405692051585e-06, |
| "loss": 0.6394, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.340085287846482, |
| "grad_norm": 0.5653594640288497, |
| "learning_rate": 6.752881261307125e-06, |
| "loss": 0.6155, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.3432835820895521, |
| "grad_norm": 0.5953314550960257, |
| "learning_rate": 6.735397551289179e-06, |
| "loss": 0.6768, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.3464818763326227, |
| "grad_norm": 0.6173871359335188, |
| "learning_rate": 6.717889682544641e-06, |
| "loss": 0.6524, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.349680170575693, |
| "grad_norm": 0.5904374139677531, |
| "learning_rate": 6.700357898803146e-06, |
| "loss": 0.6171, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.3528784648187633, |
| "grad_norm": 0.6013270780619727, |
| "learning_rate": 6.6828024441272554e-06, |
| "loss": 0.6331, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.3560767590618337, |
| "grad_norm": 0.7234400477846492, |
| "learning_rate": 6.665223562909058e-06, |
| "loss": 0.6048, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.359275053304904, |
| "grad_norm": 0.5513590207931245, |
| "learning_rate": 6.647621499866762e-06, |
| "loss": 0.6315, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.3624733475479744, |
| "grad_norm": 0.6666248996294633, |
| "learning_rate": 6.629996500041299e-06, |
| "loss": 0.6611, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.3656716417910448, |
| "grad_norm": 0.7473392889269375, |
| "learning_rate": 6.612348808792904e-06, |
| "loss": 0.6509, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.3688699360341152, |
| "grad_norm": 0.6178536139476686, |
| "learning_rate": 6.5946786717977026e-06, |
| "loss": 0.5846, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.3720682302771854, |
| "grad_norm": 0.7286227764621949, |
| "learning_rate": 6.576986335044292e-06, |
| "loss": 0.6225, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.375266524520256, |
| "grad_norm": 0.6891105977017067, |
| "learning_rate": 6.5592720448303174e-06, |
| "loss": 0.6247, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.3784648187633262, |
| "grad_norm": 0.548934245482671, |
| "learning_rate": 6.541536047759034e-06, |
| "loss": 0.6446, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.3816631130063965, |
| "grad_norm": 0.6602568691180308, |
| "learning_rate": 6.523778590735892e-06, |
| "loss": 0.6262, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.384861407249467, |
| "grad_norm": 0.6490608935621192, |
| "learning_rate": 6.5059999209650795e-06, |
| "loss": 0.6272, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.3880597014925373, |
| "grad_norm": 0.6052733094339158, |
| "learning_rate": 6.488200285946094e-06, |
| "loss": 0.6625, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.3912579957356077, |
| "grad_norm": 0.7850931175623906, |
| "learning_rate": 6.470379933470296e-06, |
| "loss": 0.6289, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.394456289978678, |
| "grad_norm": 0.6381779692800353, |
| "learning_rate": 6.452539111617454e-06, |
| "loss": 0.6403, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.3976545842217485, |
| "grad_norm": 0.6118616893209208, |
| "learning_rate": 6.434678068752293e-06, |
| "loss": 0.6407, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.4008528784648187, |
| "grad_norm": 0.6834821963612697, |
| "learning_rate": 6.416797053521039e-06, |
| "loss": 0.6274, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.4040511727078893, |
| "grad_norm": 0.6399343872362031, |
| "learning_rate": 6.398896314847954e-06, |
| "loss": 0.5884, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.4072494669509594, |
| "grad_norm": 0.650012682249965, |
| "learning_rate": 6.380976101931879e-06, |
| "loss": 0.5957, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.4104477611940298, |
| "grad_norm": 0.7005173466154051, |
| "learning_rate": 6.363036664242751e-06, |
| "loss": 0.6632, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.4136460554371002, |
| "grad_norm": 0.6793406210914823, |
| "learning_rate": 6.345078251518144e-06, |
| "loss": 0.6597, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.4168443496801706, |
| "grad_norm": 0.6296952630265025, |
| "learning_rate": 6.327101113759783e-06, |
| "loss": 0.6719, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.420042643923241, |
| "grad_norm": 0.6793002098632206, |
| "learning_rate": 6.3091055012300675e-06, |
| "loss": 0.6683, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.4232409381663114, |
| "grad_norm": 0.632818749841171, |
| "learning_rate": 6.291091664448589e-06, |
| "loss": 0.636, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.4264392324093818, |
| "grad_norm": 0.6812898757622275, |
| "learning_rate": 6.273059854188636e-06, |
| "loss": 0.6437, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.429637526652452, |
| "grad_norm": 0.6587981529111426, |
| "learning_rate": 6.25501032147372e-06, |
| "loss": 0.683, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.4328358208955223, |
| "grad_norm": 0.6112506718049283, |
| "learning_rate": 6.236943317574054e-06, |
| "loss": 0.6291, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.4360341151385927, |
| "grad_norm": 0.6816015509553757, |
| "learning_rate": 6.218859094003082e-06, |
| "loss": 0.6389, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.439232409381663, |
| "grad_norm": 0.5678562801569867, |
| "learning_rate": 6.200757902513962e-06, |
| "loss": 0.655, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.4424307036247335, |
| "grad_norm": 0.5811458853598366, |
| "learning_rate": 6.182639995096061e-06, |
| "loss": 0.6762, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.4456289978678039, |
| "grad_norm": 0.7324330200735513, |
| "learning_rate": 6.164505623971458e-06, |
| "loss": 0.6298, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.4488272921108742, |
| "grad_norm": 0.5725759197205573, |
| "learning_rate": 6.146355041591419e-06, |
| "loss": 0.6573, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.4520255863539446, |
| "grad_norm": 0.6093969811734463, |
| "learning_rate": 6.128188500632892e-06, |
| "loss": 0.6226, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.455223880597015, |
| "grad_norm": 0.5642498697251256, |
| "learning_rate": 6.11000625399499e-06, |
| "loss": 0.6047, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.4584221748400852, |
| "grad_norm": 0.5740303890176889, |
| "learning_rate": 6.091808554795462e-06, |
| "loss": 0.6318, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.4616204690831556, |
| "grad_norm": 0.6288307609988142, |
| "learning_rate": 6.073595656367175e-06, |
| "loss": 0.647, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.464818763326226, |
| "grad_norm": 0.5866496634459744, |
| "learning_rate": 6.055367812254592e-06, |
| "loss": 0.6098, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.4680170575692963, |
| "grad_norm": 0.5931796642802533, |
| "learning_rate": 6.037125276210229e-06, |
| "loss": 0.6682, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.4712153518123667, |
| "grad_norm": 0.5952005451682014, |
| "learning_rate": 6.0188683021911394e-06, |
| "loss": 0.6415, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.4744136460554371, |
| "grad_norm": 0.5818037194226144, |
| "learning_rate": 6.000597144355361e-06, |
| "loss": 0.6301, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.4776119402985075, |
| "grad_norm": 0.5536985639025898, |
| "learning_rate": 5.982312057058392e-06, |
| "loss": 0.6189, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.480810234541578, |
| "grad_norm": 0.5574476870925947, |
| "learning_rate": 5.964013294849646e-06, |
| "loss": 0.6513, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.4840085287846483, |
| "grad_norm": 0.6007921638296236, |
| "learning_rate": 5.9457011124689025e-06, |
| "loss": 0.6562, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.4872068230277184, |
| "grad_norm": 0.6484774122884718, |
| "learning_rate": 5.927375764842766e-06, |
| "loss": 0.6415, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.4904051172707888, |
| "grad_norm": 0.5367744850334513, |
| "learning_rate": 5.9090375070811215e-06, |
| "loss": 0.6324, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.4936034115138592, |
| "grad_norm": 0.6330458661332963, |
| "learning_rate": 5.890686594473571e-06, |
| "loss": 0.6773, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.4968017057569296, |
| "grad_norm": 0.6239236549424172, |
| "learning_rate": 5.872323282485889e-06, |
| "loss": 0.62, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.7714169311784487, |
| "learning_rate": 5.853947826756465e-06, |
| "loss": 0.635, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.5031982942430704, |
| "grad_norm": 0.6052673944971001, |
| "learning_rate": 5.835560483092743e-06, |
| "loss": 0.6212, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.5063965884861408, |
| "grad_norm": 0.6500878797817035, |
| "learning_rate": 5.8171615074676615e-06, |
| "loss": 0.6235, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.509594882729211, |
| "grad_norm": 0.5806431215575556, |
| "learning_rate": 5.798751156016085e-06, |
| "loss": 0.618, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.5127931769722816, |
| "grad_norm": 0.6174497537734422, |
| "learning_rate": 5.780329685031247e-06, |
| "loss": 0.6218, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.5159914712153517, |
| "grad_norm": 0.6376322903314644, |
| "learning_rate": 5.7618973509611755e-06, |
| "loss": 0.6133, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.5191897654584223, |
| "grad_norm": 0.5842117132744907, |
| "learning_rate": 5.743454410405126e-06, |
| "loss": 0.6135, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.5223880597014925, |
| "grad_norm": 0.7067488456038941, |
| "learning_rate": 5.72500112011001e-06, |
| "loss": 0.6172, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.5255863539445629, |
| "grad_norm": 0.563785993728458, |
| "learning_rate": 5.706537736966814e-06, |
| "loss": 0.598, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.5287846481876333, |
| "grad_norm": 0.6261356329525944, |
| "learning_rate": 5.688064518007036e-06, |
| "loss": 0.6362, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.5319829424307037, |
| "grad_norm": 0.7762542900583126, |
| "learning_rate": 5.669581720399094e-06, |
| "loss": 0.6108, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.535181236673774, |
| "grad_norm": 0.5837610551252048, |
| "learning_rate": 5.651089601444752e-06, |
| "loss": 0.6516, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.5383795309168442, |
| "grad_norm": 0.5716180642723976, |
| "learning_rate": 5.632588418575542e-06, |
| "loss": 0.6345, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.5415778251599148, |
| "grad_norm": 0.5946826841288883, |
| "learning_rate": 5.614078429349172e-06, |
| "loss": 0.5934, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.544776119402985, |
| "grad_norm": 0.6512015058357723, |
| "learning_rate": 5.5955598914459465e-06, |
| "loss": 0.6457, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.5479744136460556, |
| "grad_norm": 0.5539302739317081, |
| "learning_rate": 5.577033062665179e-06, |
| "loss": 0.6234, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.5511727078891258, |
| "grad_norm": 0.5788744632809979, |
| "learning_rate": 5.558498200921597e-06, |
| "loss": 0.5965, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.5543710021321961, |
| "grad_norm": 0.5723934422902696, |
| "learning_rate": 5.53995556424176e-06, |
| "loss": 0.6148, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.5575692963752665, |
| "grad_norm": 0.6217220728990693, |
| "learning_rate": 5.521405410760462e-06, |
| "loss": 0.6306, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.560767590618337, |
| "grad_norm": 0.5798449514904306, |
| "learning_rate": 5.50284799871714e-06, |
| "loss": 0.6523, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.5639658848614073, |
| "grad_norm": 0.5884980910713539, |
| "learning_rate": 5.484283586452279e-06, |
| "loss": 0.6409, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.5671641791044775, |
| "grad_norm": 0.5451220864593269, |
| "learning_rate": 5.465712432403812e-06, |
| "loss": 0.64, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.570362473347548, |
| "grad_norm": 0.6378194747314097, |
| "learning_rate": 5.447134795103531e-06, |
| "loss": 0.6369, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.5735607675906182, |
| "grad_norm": 0.6422794734982141, |
| "learning_rate": 5.428550933173476e-06, |
| "loss": 0.6315, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.5767590618336889, |
| "grad_norm": 0.6165632358558669, |
| "learning_rate": 5.409961105322347e-06, |
| "loss": 0.606, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.579957356076759, |
| "grad_norm": 0.5655676781911688, |
| "learning_rate": 5.391365570341893e-06, |
| "loss": 0.6454, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.5831556503198294, |
| "grad_norm": 0.5938708749925583, |
| "learning_rate": 5.372764587103309e-06, |
| "loss": 0.5788, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.5863539445628998, |
| "grad_norm": 0.6088992599619968, |
| "learning_rate": 5.3541584145536475e-06, |
| "loss": 0.6072, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.5895522388059702, |
| "grad_norm": 0.5784074732476567, |
| "learning_rate": 5.335547311712188e-06, |
| "loss": 0.6217, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.5927505330490406, |
| "grad_norm": 0.6355703007033081, |
| "learning_rate": 5.3169315376668566e-06, |
| "loss": 0.6003, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.5959488272921107, |
| "grad_norm": 0.5969121792808912, |
| "learning_rate": 5.2983113515706045e-06, |
| "loss": 0.6141, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.5991471215351813, |
| "grad_norm": 0.5840423887981219, |
| "learning_rate": 5.279687012637798e-06, |
| "loss": 0.6454, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.6023454157782515, |
| "grad_norm": 0.632640436994868, |
| "learning_rate": 5.2610587801406256e-06, |
| "loss": 0.619, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.6055437100213221, |
| "grad_norm": 0.6215694598661943, |
| "learning_rate": 5.242426913405471e-06, |
| "loss": 0.612, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.6087420042643923, |
| "grad_norm": 0.6102315942715076, |
| "learning_rate": 5.223791671809314e-06, |
| "loss": 0.6246, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.6119402985074627, |
| "grad_norm": 0.6343359621017866, |
| "learning_rate": 5.2051533147761155e-06, |
| "loss": 0.6361, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.615138592750533, |
| "grad_norm": 0.6647629539626027, |
| "learning_rate": 5.186512101773206e-06, |
| "loss": 0.6227, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.6183368869936035, |
| "grad_norm": 0.6372800082536703, |
| "learning_rate": 5.167868292307679e-06, |
| "loss": 0.6478, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.6215351812366738, |
| "grad_norm": 0.5768049939693273, |
| "learning_rate": 5.149222145922765e-06, |
| "loss": 0.6459, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.624733475479744, |
| "grad_norm": 0.625505130556027, |
| "learning_rate": 5.130573922194236e-06, |
| "loss": 0.6587, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.6279317697228146, |
| "grad_norm": 0.6164713574466578, |
| "learning_rate": 5.111923880726779e-06, |
| "loss": 0.631, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.6311300639658848, |
| "grad_norm": 0.6064673645339017, |
| "learning_rate": 5.093272281150383e-06, |
| "loss": 0.6425, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.6343283582089554, |
| "grad_norm": 0.6081052704243078, |
| "learning_rate": 5.074619383116733e-06, |
| "loss": 0.5795, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.6375266524520256, |
| "grad_norm": 0.6412746235926382, |
| "learning_rate": 5.05596544629559e-06, |
| "loss": 0.6341, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.640724946695096, |
| "grad_norm": 0.6297169827461854, |
| "learning_rate": 5.03731073037117e-06, |
| "loss": 0.6413, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.6439232409381663, |
| "grad_norm": 0.5638619759271226, |
| "learning_rate": 5.018655495038542e-06, |
| "loss": 0.642, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.6471215351812367, |
| "grad_norm": 0.556408547120617, |
| "learning_rate": 5e-06, |
| "loss": 0.6713, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.650319829424307, |
| "grad_norm": 0.6515376681950392, |
| "learning_rate": 4.981344504961459e-06, |
| "loss": 0.6701, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.6535181236673773, |
| "grad_norm": 0.5492041340566489, |
| "learning_rate": 4.962689269628832e-06, |
| "loss": 0.6173, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.6567164179104479, |
| "grad_norm": 0.5686773096619224, |
| "learning_rate": 4.944034553704412e-06, |
| "loss": 0.6461, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.659914712153518, |
| "grad_norm": 0.6435263865701101, |
| "learning_rate": 4.9253806168832685e-06, |
| "loss": 0.6123, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.6631130063965884, |
| "grad_norm": 0.5691664452687136, |
| "learning_rate": 4.906727718849619e-06, |
| "loss": 0.6238, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.6663113006396588, |
| "grad_norm": 0.567539010985681, |
| "learning_rate": 4.888076119273223e-06, |
| "loss": 0.6184, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.6695095948827292, |
| "grad_norm": 0.6144632127897085, |
| "learning_rate": 4.8694260778057655e-06, |
| "loss": 0.5979, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.6727078891257996, |
| "grad_norm": 0.5911520451028203, |
| "learning_rate": 4.850777854077235e-06, |
| "loss": 0.6469, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.67590618336887, |
| "grad_norm": 0.5454031941868988, |
| "learning_rate": 4.832131707692322e-06, |
| "loss": 0.6122, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.6791044776119404, |
| "grad_norm": 0.5981940676142077, |
| "learning_rate": 4.813487898226794e-06, |
| "loss": 0.6427, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.6823027718550105, |
| "grad_norm": 0.5547199384817155, |
| "learning_rate": 4.7948466852238844e-06, |
| "loss": 0.6278, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.6855010660980811, |
| "grad_norm": 0.5468385314356093, |
| "learning_rate": 4.7762083281906864e-06, |
| "loss": 0.594, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.6886993603411513, |
| "grad_norm": 0.5456445809907, |
| "learning_rate": 4.757573086594529e-06, |
| "loss": 0.5884, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.6918976545842217, |
| "grad_norm": 0.6646249117314513, |
| "learning_rate": 4.738941219859375e-06, |
| "loss": 0.6306, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.695095948827292, |
| "grad_norm": 0.6292949980039241, |
| "learning_rate": 4.720312987362204e-06, |
| "loss": 0.6619, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.6982942430703625, |
| "grad_norm": 0.6322989923958425, |
| "learning_rate": 4.701688648429399e-06, |
| "loss": 0.6266, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.7014925373134329, |
| "grad_norm": 0.5305082924051039, |
| "learning_rate": 4.683068462333144e-06, |
| "loss": 0.6342, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.7046908315565032, |
| "grad_norm": 0.550929491236182, |
| "learning_rate": 4.6644526882878145e-06, |
| "loss": 0.6392, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.7078891257995736, |
| "grad_norm": 0.6232015245825243, |
| "learning_rate": 4.645841585446356e-06, |
| "loss": 0.6064, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.7110874200426438, |
| "grad_norm": 0.5434413619942932, |
| "learning_rate": 4.6272354128966924e-06, |
| "loss": 0.6501, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.5191107189759604, |
| "learning_rate": 4.6086344296581095e-06, |
| "loss": 0.6448, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.7174840085287846, |
| "grad_norm": 0.5434292262769143, |
| "learning_rate": 4.590038894677653e-06, |
| "loss": 0.6074, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.720682302771855, |
| "grad_norm": 0.4655865353721599, |
| "learning_rate": 4.5714490668265245e-06, |
| "loss": 0.6236, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.7238805970149254, |
| "grad_norm": 0.550629570360553, |
| "learning_rate": 4.55286520489647e-06, |
| "loss": 0.6397, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.7270788912579957, |
| "grad_norm": 0.6004187629555796, |
| "learning_rate": 4.534287567596189e-06, |
| "loss": 0.6385, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.7302771855010661, |
| "grad_norm": 0.5176720613714986, |
| "learning_rate": 4.515716413547722e-06, |
| "loss": 0.6429, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.7334754797441365, |
| "grad_norm": 0.5115763781808625, |
| "learning_rate": 4.497152001282861e-06, |
| "loss": 0.6382, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.736673773987207, |
| "grad_norm": 0.5562269449292555, |
| "learning_rate": 4.478594589239539e-06, |
| "loss": 0.6236, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.739872068230277, |
| "grad_norm": 0.5670914639063015, |
| "learning_rate": 4.460044435758241e-06, |
| "loss": 0.6441, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.7430703624733477, |
| "grad_norm": 0.5489747231617312, |
| "learning_rate": 4.441501799078405e-06, |
| "loss": 0.613, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.7462686567164178, |
| "grad_norm": 0.5424103892867449, |
| "learning_rate": 4.4229669373348225e-06, |
| "loss": 0.6253, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.7494669509594882, |
| "grad_norm": 0.5562973456307682, |
| "learning_rate": 4.404440108554055e-06, |
| "loss": 0.6168, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.7526652452025586, |
| "grad_norm": 0.533299067462362, |
| "learning_rate": 4.3859215706508295e-06, |
| "loss": 0.6485, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.755863539445629, |
| "grad_norm": 0.5172786400673339, |
| "learning_rate": 4.3674115814244595e-06, |
| "loss": 0.6323, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.7590618336886994, |
| "grad_norm": 0.5085177296011396, |
| "learning_rate": 4.348910398555249e-06, |
| "loss": 0.5899, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.7622601279317696, |
| "grad_norm": 0.47810924329720716, |
| "learning_rate": 4.330418279600907e-06, |
| "loss": 0.6379, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.7654584221748402, |
| "grad_norm": 0.5321362592336698, |
| "learning_rate": 4.311935481992965e-06, |
| "loss": 0.6151, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.7686567164179103, |
| "grad_norm": 0.5446514716404816, |
| "learning_rate": 4.2934622630331855e-06, |
| "loss": 0.6079, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.771855010660981, |
| "grad_norm": 0.5237047324414964, |
| "learning_rate": 4.274998879889991e-06, |
| "loss": 0.6427, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.775053304904051, |
| "grad_norm": 0.5365142222456774, |
| "learning_rate": 4.2565455895948745e-06, |
| "loss": 0.582, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.7782515991471215, |
| "grad_norm": 0.5308651285729982, |
| "learning_rate": 4.238102649038825e-06, |
| "loss": 0.6575, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.7814498933901919, |
| "grad_norm": 0.5717647379657843, |
| "learning_rate": 4.219670314968754e-06, |
| "loss": 0.62, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.7846481876332623, |
| "grad_norm": 0.5050163481740146, |
| "learning_rate": 4.2012488439839185e-06, |
| "loss": 0.6636, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.7878464818763327, |
| "grad_norm": 0.49227167259915716, |
| "learning_rate": 4.182838492532342e-06, |
| "loss": 0.5945, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.7910447761194028, |
| "grad_norm": 0.5302015513388415, |
| "learning_rate": 4.164439516907258e-06, |
| "loss": 0.6106, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.7942430703624734, |
| "grad_norm": 0.64011360289916, |
| "learning_rate": 4.146052173243538e-06, |
| "loss": 0.6593, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.7974413646055436, |
| "grad_norm": 0.601687625944857, |
| "learning_rate": 4.127676717514114e-06, |
| "loss": 0.6092, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.8006396588486142, |
| "grad_norm": 0.524585479629224, |
| "learning_rate": 4.109313405526433e-06, |
| "loss": 0.6446, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.8038379530916844, |
| "grad_norm": 0.6161148649503772, |
| "learning_rate": 4.090962492918881e-06, |
| "loss": 0.6233, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.8070362473347548, |
| "grad_norm": 0.6062559672756939, |
| "learning_rate": 4.072624235157234e-06, |
| "loss": 0.6623, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.8102345415778252, |
| "grad_norm": 0.5985336173857585, |
| "learning_rate": 4.054298887531099e-06, |
| "loss": 0.582, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.8134328358208955, |
| "grad_norm": 0.5632646612772408, |
| "learning_rate": 4.035986705150355e-06, |
| "loss": 0.6159, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.816631130063966, |
| "grad_norm": 0.7068557752731836, |
| "learning_rate": 4.017687942941609e-06, |
| "loss": 0.6536, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.819829424307036, |
| "grad_norm": 0.5730952893121638, |
| "learning_rate": 3.9994028556446404e-06, |
| "loss": 0.6241, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.8230277185501067, |
| "grad_norm": 0.5175887707114529, |
| "learning_rate": 3.981131697808862e-06, |
| "loss": 0.6627, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.8262260127931769, |
| "grad_norm": 0.5796987142687964, |
| "learning_rate": 3.9628747237897715e-06, |
| "loss": 0.6536, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.8294243070362475, |
| "grad_norm": 0.5271014946968099, |
| "learning_rate": 3.94463218774541e-06, |
| "loss": 0.6467, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.8326226012793176, |
| "grad_norm": 0.5704417597925869, |
| "learning_rate": 3.926404343632826e-06, |
| "loss": 0.6831, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.835820895522388, |
| "grad_norm": 0.5292627931377012, |
| "learning_rate": 3.90819144520454e-06, |
| "loss": 0.6408, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.8390191897654584, |
| "grad_norm": 0.5646881580541779, |
| "learning_rate": 3.889993746005011e-06, |
| "loss": 0.6348, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.8422174840085288, |
| "grad_norm": 0.5401410573662442, |
| "learning_rate": 3.8718114993671086e-06, |
| "loss": 0.5917, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.8454157782515992, |
| "grad_norm": 0.5033737411697443, |
| "learning_rate": 3.853644958408582e-06, |
| "loss": 0.6454, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.8486140724946694, |
| "grad_norm": 0.5317406038132683, |
| "learning_rate": 3.835494376028544e-06, |
| "loss": 0.579, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.85181236673774, |
| "grad_norm": 0.5247944872571422, |
| "learning_rate": 3.817360004903939e-06, |
| "loss": 0.6443, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.8550106609808101, |
| "grad_norm": 0.5255266393479685, |
| "learning_rate": 3.799242097486038e-06, |
| "loss": 0.6548, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.8582089552238807, |
| "grad_norm": 0.5562031814775014, |
| "learning_rate": 3.7811409059969177e-06, |
| "loss": 0.6368, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.861407249466951, |
| "grad_norm": 0.5490844678533507, |
| "learning_rate": 3.7630566824259456e-06, |
| "loss": 0.6001, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.8646055437100213, |
| "grad_norm": 0.5257106510728633, |
| "learning_rate": 3.7449896785262817e-06, |
| "loss": 0.6092, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.8678038379530917, |
| "grad_norm": 0.5410953577384838, |
| "learning_rate": 3.726940145811363e-06, |
| "loss": 0.643, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.871002132196162, |
| "grad_norm": 0.5205935779573823, |
| "learning_rate": 3.708908335551412e-06, |
| "loss": 0.6609, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.8742004264392325, |
| "grad_norm": 0.5715057818226926, |
| "learning_rate": 3.6908944987699346e-06, |
| "loss": 0.6191, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.8773987206823026, |
| "grad_norm": 0.5410333169459866, |
| "learning_rate": 3.67289888624022e-06, |
| "loss": 0.6169, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.8805970149253732, |
| "grad_norm": 0.5321189795175738, |
| "learning_rate": 3.6549217484818576e-06, |
| "loss": 0.5957, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.8837953091684434, |
| "grad_norm": 0.6038607670908029, |
| "learning_rate": 3.6369633357572514e-06, |
| "loss": 0.6112, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.886993603411514, |
| "grad_norm": 0.5382547649391735, |
| "learning_rate": 3.6190238980681235e-06, |
| "loss": 0.6446, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.8901918976545842, |
| "grad_norm": 0.509428705908829, |
| "learning_rate": 3.6011036851520465e-06, |
| "loss": 0.6245, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.8933901918976546, |
| "grad_norm": 0.5331030357626905, |
| "learning_rate": 3.583202946478963e-06, |
| "loss": 0.6667, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.896588486140725, |
| "grad_norm": 0.5957179693199653, |
| "learning_rate": 3.5653219312477085e-06, |
| "loss": 0.6164, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.8997867803837953, |
| "grad_norm": 0.540978235449404, |
| "learning_rate": 3.5474608883825475e-06, |
| "loss": 0.6814, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.9029850746268657, |
| "grad_norm": 0.5609593877190009, |
| "learning_rate": 3.529620066529704e-06, |
| "loss": 0.6337, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.906183368869936, |
| "grad_norm": 0.5759309632996505, |
| "learning_rate": 3.5117997140539073e-06, |
| "loss": 0.637, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.9093816631130065, |
| "grad_norm": 0.5913831985711084, |
| "learning_rate": 3.4940000790349226e-06, |
| "loss": 0.6288, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.9125799573560767, |
| "grad_norm": 0.5075699761691839, |
| "learning_rate": 3.47622140926411e-06, |
| "loss": 0.6173, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.9157782515991473, |
| "grad_norm": 0.6653450535384141, |
| "learning_rate": 3.458463952240967e-06, |
| "loss": 0.6523, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.9189765458422174, |
| "grad_norm": 0.628552652910662, |
| "learning_rate": 3.4407279551696846e-06, |
| "loss": 0.6358, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.9221748400852878, |
| "grad_norm": 0.5333242599508652, |
| "learning_rate": 3.4230136649557087e-06, |
| "loss": 0.6288, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.9253731343283582, |
| "grad_norm": 0.47146898112937263, |
| "learning_rate": 3.4053213282022983e-06, |
| "loss": 0.6596, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.9285714285714286, |
| "grad_norm": 0.6621044867075212, |
| "learning_rate": 3.387651191207097e-06, |
| "loss": 0.5934, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.931769722814499, |
| "grad_norm": 0.5923983943553847, |
| "learning_rate": 3.370003499958703e-06, |
| "loss": 0.6332, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.9349680170575692, |
| "grad_norm": 0.5262532242135438, |
| "learning_rate": 3.352378500133239e-06, |
| "loss": 0.6164, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.9381663113006398, |
| "grad_norm": 0.6174684357894091, |
| "learning_rate": 3.334776437090944e-06, |
| "loss": 0.6276, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.94136460554371, |
| "grad_norm": 0.6066479008889557, |
| "learning_rate": 3.317197555872745e-06, |
| "loss": 0.6061, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.9445628997867805, |
| "grad_norm": 0.5282593114168354, |
| "learning_rate": 3.2996421011968546e-06, |
| "loss": 0.5957, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.9477611940298507, |
| "grad_norm": 0.5428512832531854, |
| "learning_rate": 3.28211031745536e-06, |
| "loss": 0.6274, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.950959488272921, |
| "grad_norm": 0.563963200457514, |
| "learning_rate": 3.264602448710822e-06, |
| "loss": 0.5977, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.9541577825159915, |
| "grad_norm": 0.506718113639374, |
| "learning_rate": 3.2471187386928766e-06, |
| "loss": 0.5975, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.9573560767590619, |
| "grad_norm": 0.5077307814187035, |
| "learning_rate": 3.2296594307948428e-06, |
| "loss": 0.6401, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.9605543710021323, |
| "grad_norm": 0.5128850695519592, |
| "learning_rate": 3.212224768070334e-06, |
| "loss": 0.6381, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.9637526652452024, |
| "grad_norm": 0.5083765825134322, |
| "learning_rate": 3.194814993229878e-06, |
| "loss": 0.62, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.966950959488273, |
| "grad_norm": 0.47099407882432265, |
| "learning_rate": 3.177430348637527e-06, |
| "loss": 0.6125, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.9701492537313432, |
| "grad_norm": 0.5530258695290763, |
| "learning_rate": 3.1600710763074972e-06, |
| "loss": 0.6356, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.9733475479744138, |
| "grad_norm": 0.5347670899530949, |
| "learning_rate": 3.142737417900793e-06, |
| "loss": 0.6611, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.976545842217484, |
| "grad_norm": 0.5188746097889174, |
| "learning_rate": 3.125429614721842e-06, |
| "loss": 0.6185, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.9797441364605544, |
| "grad_norm": 0.47423520536928904, |
| "learning_rate": 3.1081479077151387e-06, |
| "loss": 0.6207, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.9829424307036247, |
| "grad_norm": 0.5191715465473807, |
| "learning_rate": 3.090892537461889e-06, |
| "loss": 0.604, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.9861407249466951, |
| "grad_norm": 0.6101222943327476, |
| "learning_rate": 3.0736637441766594e-06, |
| "loss": 0.6234, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.9893390191897655, |
| "grad_norm": 0.5351791226481845, |
| "learning_rate": 3.056461767704037e-06, |
| "loss": 0.6411, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.9925373134328357, |
| "grad_norm": 0.5193133031919753, |
| "learning_rate": 3.039286847515284e-06, |
| "loss": 0.6175, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.9957356076759063, |
| "grad_norm": 0.5316524549516811, |
| "learning_rate": 3.0221392227050126e-06, |
| "loss": 0.6408, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.9989339019189765, |
| "grad_norm": 0.5093874593326244, |
| "learning_rate": 3.00501913198785e-06, |
| "loss": 0.6261, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.002132196162047, |
| "grad_norm": 0.6163744912473614, |
| "learning_rate": 2.9879268136951163e-06, |
| "loss": 0.5951, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.0053304904051172, |
| "grad_norm": 0.5783808952696761, |
| "learning_rate": 2.970862505771509e-06, |
| "loss": 0.5622, |
| "step": 627 |
| }, |
| { |
| "epoch": 2.008528784648188, |
| "grad_norm": 0.49273447076344123, |
| "learning_rate": 2.953826445771788e-06, |
| "loss": 0.5673, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.011727078891258, |
| "grad_norm": 0.5859845266864249, |
| "learning_rate": 2.9368188708574706e-06, |
| "loss": 0.6246, |
| "step": 629 |
| }, |
| { |
| "epoch": 2.014925373134328, |
| "grad_norm": 0.5611571675265489, |
| "learning_rate": 2.9198400177935303e-06, |
| "loss": 0.6277, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.018123667377399, |
| "grad_norm": 0.5276964961204649, |
| "learning_rate": 2.902890122945096e-06, |
| "loss": 0.5577, |
| "step": 631 |
| }, |
| { |
| "epoch": 2.021321961620469, |
| "grad_norm": 0.525544540323068, |
| "learning_rate": 2.8859694222741653e-06, |
| "loss": 0.6051, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.0245202558635396, |
| "grad_norm": 0.5673683781918863, |
| "learning_rate": 2.869078151336323e-06, |
| "loss": 0.5808, |
| "step": 633 |
| }, |
| { |
| "epoch": 2.0277185501066097, |
| "grad_norm": 0.5589784105831517, |
| "learning_rate": 2.852216545277456e-06, |
| "loss": 0.6106, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.0309168443496803, |
| "grad_norm": 0.5030496312589189, |
| "learning_rate": 2.835384838830481e-06, |
| "loss": 0.5541, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.0341151385927505, |
| "grad_norm": 0.6708437250927518, |
| "learning_rate": 2.8185832663120817e-06, |
| "loss": 0.6271, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.0373134328358207, |
| "grad_norm": 0.5763870321226472, |
| "learning_rate": 2.8018120616194356e-06, |
| "loss": 0.5507, |
| "step": 637 |
| }, |
| { |
| "epoch": 2.0405117270788913, |
| "grad_norm": 0.5752129249821096, |
| "learning_rate": 2.785071458226972e-06, |
| "loss": 0.5978, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.0437100213219614, |
| "grad_norm": 0.5054941801603793, |
| "learning_rate": 2.768361689183113e-06, |
| "loss": 0.6203, |
| "step": 639 |
| }, |
| { |
| "epoch": 2.046908315565032, |
| "grad_norm": 0.5481782008730065, |
| "learning_rate": 2.7516829871070295e-06, |
| "loss": 0.6093, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.050106609808102, |
| "grad_norm": 0.5323921761861825, |
| "learning_rate": 2.735035584185409e-06, |
| "loss": 0.6007, |
| "step": 641 |
| }, |
| { |
| "epoch": 2.053304904051173, |
| "grad_norm": 0.5293554299631367, |
| "learning_rate": 2.718419712169213e-06, |
| "loss": 0.5884, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.056503198294243, |
| "grad_norm": 0.553435984362884, |
| "learning_rate": 2.7018356023704574e-06, |
| "loss": 0.5326, |
| "step": 643 |
| }, |
| { |
| "epoch": 2.0597014925373136, |
| "grad_norm": 0.5139320287446054, |
| "learning_rate": 2.685283485658995e-06, |
| "loss": 0.5639, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.0628997867803838, |
| "grad_norm": 0.49577644020424133, |
| "learning_rate": 2.668763592459297e-06, |
| "loss": 0.5571, |
| "step": 645 |
| }, |
| { |
| "epoch": 2.066098081023454, |
| "grad_norm": 0.5089676714103052, |
| "learning_rate": 2.6522761527472464e-06, |
| "loss": 0.6163, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.0692963752665245, |
| "grad_norm": 0.5593194742250533, |
| "learning_rate": 2.6358213960469357e-06, |
| "loss": 0.5808, |
| "step": 647 |
| }, |
| { |
| "epoch": 2.0724946695095947, |
| "grad_norm": 0.5130651147305723, |
| "learning_rate": 2.6193995514274705e-06, |
| "loss": 0.5953, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.0756929637526653, |
| "grad_norm": 0.5609831284876498, |
| "learning_rate": 2.6030108474997854e-06, |
| "loss": 0.5841, |
| "step": 649 |
| }, |
| { |
| "epoch": 2.0788912579957355, |
| "grad_norm": 0.47312462257776017, |
| "learning_rate": 2.586655512413458e-06, |
| "loss": 0.5967, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.082089552238806, |
| "grad_norm": 0.5001915039170618, |
| "learning_rate": 2.5703337738535324e-06, |
| "loss": 0.5485, |
| "step": 651 |
| }, |
| { |
| "epoch": 2.0852878464818763, |
| "grad_norm": 0.5785964658330373, |
| "learning_rate": 2.554045859037353e-06, |
| "loss": 0.566, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.088486140724947, |
| "grad_norm": 0.5912116290504553, |
| "learning_rate": 2.5377919947113917e-06, |
| "loss": 0.5856, |
| "step": 653 |
| }, |
| { |
| "epoch": 2.091684434968017, |
| "grad_norm": 0.5209304706365177, |
| "learning_rate": 2.521572407148107e-06, |
| "loss": 0.5556, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.094882729211087, |
| "grad_norm": 0.5101895622941909, |
| "learning_rate": 2.505387322142782e-06, |
| "loss": 0.5776, |
| "step": 655 |
| }, |
| { |
| "epoch": 2.098081023454158, |
| "grad_norm": 0.4880680240587064, |
| "learning_rate": 2.4892369650103837e-06, |
| "loss": 0.6056, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.101279317697228, |
| "grad_norm": 0.5514381593453722, |
| "learning_rate": 2.4731215605824304e-06, |
| "loss": 0.6006, |
| "step": 657 |
| }, |
| { |
| "epoch": 2.1044776119402986, |
| "grad_norm": 0.5751639987148548, |
| "learning_rate": 2.4570413332038523e-06, |
| "loss": 0.5665, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.1076759061833688, |
| "grad_norm": 0.5804924762978739, |
| "learning_rate": 2.440996506729881e-06, |
| "loss": 0.5973, |
| "step": 659 |
| }, |
| { |
| "epoch": 2.1108742004264394, |
| "grad_norm": 0.4918030532858259, |
| "learning_rate": 2.4249873045229244e-06, |
| "loss": 0.57, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.1140724946695095, |
| "grad_norm": 0.5243890793969488, |
| "learning_rate": 2.4090139494494596e-06, |
| "loss": 0.5871, |
| "step": 661 |
| }, |
| { |
| "epoch": 2.11727078891258, |
| "grad_norm": 0.4806747676363039, |
| "learning_rate": 2.3930766638769325e-06, |
| "loss": 0.5598, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.1204690831556503, |
| "grad_norm": 0.5175543377119891, |
| "learning_rate": 2.3771756696706553e-06, |
| "loss": 0.6051, |
| "step": 663 |
| }, |
| { |
| "epoch": 2.1236673773987205, |
| "grad_norm": 0.5246309451300202, |
| "learning_rate": 2.3613111881907273e-06, |
| "loss": 0.598, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.126865671641791, |
| "grad_norm": 0.526456441833946, |
| "learning_rate": 2.345483440288947e-06, |
| "loss": 0.547, |
| "step": 665 |
| }, |
| { |
| "epoch": 2.1300639658848612, |
| "grad_norm": 0.47184799877761335, |
| "learning_rate": 2.3296926463057396e-06, |
| "loss": 0.6138, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.133262260127932, |
| "grad_norm": 0.48133254553344923, |
| "learning_rate": 2.313939026067091e-06, |
| "loss": 0.583, |
| "step": 667 |
| }, |
| { |
| "epoch": 2.136460554371002, |
| "grad_norm": 0.5064732476569918, |
| "learning_rate": 2.29822279888148e-06, |
| "loss": 0.5454, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.1396588486140726, |
| "grad_norm": 0.5781888574823206, |
| "learning_rate": 2.2825441835368377e-06, |
| "loss": 0.5712, |
| "step": 669 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.5978766513888759, |
| "learning_rate": 2.2669033982974946e-06, |
| "loss": 0.5695, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.1460554371002134, |
| "grad_norm": 0.4555096407611019, |
| "learning_rate": 2.2513006609011365e-06, |
| "loss": 0.5555, |
| "step": 671 |
| }, |
| { |
| "epoch": 2.1492537313432836, |
| "grad_norm": 0.4676695942649127, |
| "learning_rate": 2.235736188555787e-06, |
| "loss": 0.5735, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.1524520255863537, |
| "grad_norm": 0.4763343046497065, |
| "learning_rate": 2.2202101979367735e-06, |
| "loss": 0.5955, |
| "step": 673 |
| }, |
| { |
| "epoch": 2.1556503198294243, |
| "grad_norm": 0.4657805538089298, |
| "learning_rate": 2.2047229051837107e-06, |
| "loss": 0.6076, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.1588486140724945, |
| "grad_norm": 0.4838763721788115, |
| "learning_rate": 2.189274525897498e-06, |
| "loss": 0.616, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.162046908315565, |
| "grad_norm": 0.5013682671987199, |
| "learning_rate": 2.173865275137314e-06, |
| "loss": 0.5809, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.1652452025586353, |
| "grad_norm": 0.4730326559560223, |
| "learning_rate": 2.158495367417625e-06, |
| "loss": 0.5758, |
| "step": 677 |
| }, |
| { |
| "epoch": 2.168443496801706, |
| "grad_norm": 0.49708415063251904, |
| "learning_rate": 2.143165016705192e-06, |
| "loss": 0.5859, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.171641791044776, |
| "grad_norm": 0.4952662090916961, |
| "learning_rate": 2.1278744364161007e-06, |
| "loss": 0.5575, |
| "step": 679 |
| }, |
| { |
| "epoch": 2.1748400852878467, |
| "grad_norm": 0.46833442631122313, |
| "learning_rate": 2.1126238394127868e-06, |
| "loss": 0.5974, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.178038379530917, |
| "grad_norm": 0.4714670060045907, |
| "learning_rate": 2.0974134380010726e-06, |
| "loss": 0.5751, |
| "step": 681 |
| }, |
| { |
| "epoch": 2.181236673773987, |
| "grad_norm": 0.4772805496941911, |
| "learning_rate": 2.082243443927212e-06, |
| "loss": 0.5996, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.1844349680170576, |
| "grad_norm": 0.4606017759107387, |
| "learning_rate": 2.0671140683749386e-06, |
| "loss": 0.5817, |
| "step": 683 |
| }, |
| { |
| "epoch": 2.1876332622601278, |
| "grad_norm": 0.4774965126191024, |
| "learning_rate": 2.052025521962534e-06, |
| "loss": 0.5961, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.1908315565031984, |
| "grad_norm": 0.49760796326925766, |
| "learning_rate": 2.03697801473989e-06, |
| "loss": 0.5788, |
| "step": 685 |
| }, |
| { |
| "epoch": 2.1940298507462686, |
| "grad_norm": 0.4871186232939493, |
| "learning_rate": 2.0219717561855857e-06, |
| "loss": 0.5879, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.197228144989339, |
| "grad_norm": 0.464109823915951, |
| "learning_rate": 2.0070069552039722e-06, |
| "loss": 0.6154, |
| "step": 687 |
| }, |
| { |
| "epoch": 2.2004264392324093, |
| "grad_norm": 0.4856649377645587, |
| "learning_rate": 1.992083820122259e-06, |
| "loss": 0.5788, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.20362473347548, |
| "grad_norm": 0.46996237636688565, |
| "learning_rate": 1.9772025586876252e-06, |
| "loss": 0.5953, |
| "step": 689 |
| }, |
| { |
| "epoch": 2.20682302771855, |
| "grad_norm": 0.46859069921217344, |
| "learning_rate": 1.962363378064316e-06, |
| "loss": 0.6083, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.2100213219616203, |
| "grad_norm": 0.5168988401256138, |
| "learning_rate": 1.947566484830765e-06, |
| "loss": 0.5814, |
| "step": 691 |
| }, |
| { |
| "epoch": 2.213219616204691, |
| "grad_norm": 0.5406960885215742, |
| "learning_rate": 1.9328120849767198e-06, |
| "loss": 0.5561, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.216417910447761, |
| "grad_norm": 0.44334448566836304, |
| "learning_rate": 1.9181003839003627e-06, |
| "loss": 0.5927, |
| "step": 693 |
| }, |
| { |
| "epoch": 2.2196162046908317, |
| "grad_norm": 0.4557583490316895, |
| "learning_rate": 1.9034315864054682e-06, |
| "loss": 0.5862, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.222814498933902, |
| "grad_norm": 0.5092922861926251, |
| "learning_rate": 1.8888058966985407e-06, |
| "loss": 0.5668, |
| "step": 695 |
| }, |
| { |
| "epoch": 2.2260127931769724, |
| "grad_norm": 0.5004818455277625, |
| "learning_rate": 1.8742235183859747e-06, |
| "loss": 0.5968, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.2292110874200426, |
| "grad_norm": 0.4949014258712852, |
| "learning_rate": 1.8596846544712233e-06, |
| "loss": 0.622, |
| "step": 697 |
| }, |
| { |
| "epoch": 2.232409381663113, |
| "grad_norm": 0.47178385456983946, |
| "learning_rate": 1.8451895073519643e-06, |
| "loss": 0.5725, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.2356076759061834, |
| "grad_norm": 0.4709195571648972, |
| "learning_rate": 1.8307382788172877e-06, |
| "loss": 0.5991, |
| "step": 699 |
| }, |
| { |
| "epoch": 2.2388059701492535, |
| "grad_norm": 0.4806780947771992, |
| "learning_rate": 1.8163311700448899e-06, |
| "loss": 0.5976, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.242004264392324, |
| "grad_norm": 0.46974071200274314, |
| "learning_rate": 1.8019683815982691e-06, |
| "loss": 0.6091, |
| "step": 701 |
| }, |
| { |
| "epoch": 2.2452025586353943, |
| "grad_norm": 0.45361098939294514, |
| "learning_rate": 1.7876501134239316e-06, |
| "loss": 0.5933, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.248400852878465, |
| "grad_norm": 0.4850717880695125, |
| "learning_rate": 1.7733765648486134e-06, |
| "loss": 0.5597, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.251599147121535, |
| "grad_norm": 0.4692790998035614, |
| "learning_rate": 1.7591479345764972e-06, |
| "loss": 0.6007, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.2547974413646057, |
| "grad_norm": 0.4906873053697701, |
| "learning_rate": 1.7449644206864564e-06, |
| "loss": 0.552, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.257995735607676, |
| "grad_norm": 0.481979324803139, |
| "learning_rate": 1.7308262206292898e-06, |
| "loss": 0.6125, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.2611940298507465, |
| "grad_norm": 0.5016963903077071, |
| "learning_rate": 1.7167335312249766e-06, |
| "loss": 0.6017, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.2643923240938166, |
| "grad_norm": 0.47149916005013454, |
| "learning_rate": 1.7026865486599375e-06, |
| "loss": 0.5869, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.267590618336887, |
| "grad_norm": 0.507514553246801, |
| "learning_rate": 1.6886854684842962e-06, |
| "loss": 0.5926, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.2707889125799574, |
| "grad_norm": 0.49251780946608525, |
| "learning_rate": 1.6747304856091662e-06, |
| "loss": 0.5874, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.2739872068230276, |
| "grad_norm": 0.47492882189499497, |
| "learning_rate": 1.660821794303934e-06, |
| "loss": 0.6, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.277185501066098, |
| "grad_norm": 0.4403913104531341, |
| "learning_rate": 1.6469595881935523e-06, |
| "loss": 0.5673, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.2803837953091683, |
| "grad_norm": 0.5039434479895408, |
| "learning_rate": 1.6331440602558501e-06, |
| "loss": 0.5908, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.283582089552239, |
| "grad_norm": 0.4893793347672981, |
| "learning_rate": 1.6193754028188363e-06, |
| "loss": 0.5757, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.286780383795309, |
| "grad_norm": 0.4833544188283998, |
| "learning_rate": 1.6056538075580342e-06, |
| "loss": 0.6034, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.2899786780383797, |
| "grad_norm": 0.46660784623966395, |
| "learning_rate": 1.591979465493806e-06, |
| "loss": 0.554, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.29317697228145, |
| "grad_norm": 0.4941030437078769, |
| "learning_rate": 1.5783525669886934e-06, |
| "loss": 0.5615, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.29637526652452, |
| "grad_norm": 0.467606067398601, |
| "learning_rate": 1.5647733017447741e-06, |
| "loss": 0.608, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.2995735607675907, |
| "grad_norm": 0.47853133402269155, |
| "learning_rate": 1.5512418588010086e-06, |
| "loss": 0.5894, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.302771855010661, |
| "grad_norm": 0.4972054054110685, |
| "learning_rate": 1.5377584265306222e-06, |
| "loss": 0.5282, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.3059701492537314, |
| "grad_norm": 0.4727141579634826, |
| "learning_rate": 1.5243231926384744e-06, |
| "loss": 0.6045, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.3091684434968016, |
| "grad_norm": 0.46328977747110367, |
| "learning_rate": 1.510936344158448e-06, |
| "loss": 0.6181, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.3123667377398722, |
| "grad_norm": 0.46878962646890604, |
| "learning_rate": 1.4975980674508472e-06, |
| "loss": 0.5932, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.3155650319829424, |
| "grad_norm": 0.46930491695796345, |
| "learning_rate": 1.484308548199796e-06, |
| "loss": 0.5943, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.318763326226013, |
| "grad_norm": 0.4819028591025632, |
| "learning_rate": 1.4710679714106635e-06, |
| "loss": 0.6015, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.321961620469083, |
| "grad_norm": 0.4490778160016328, |
| "learning_rate": 1.4578765214074842e-06, |
| "loss": 0.5885, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.3251599147121533, |
| "grad_norm": 0.439589868568974, |
| "learning_rate": 1.444734381830386e-06, |
| "loss": 0.5752, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.328358208955224, |
| "grad_norm": 0.4595533358265758, |
| "learning_rate": 1.4316417356330441e-06, |
| "loss": 0.5933, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.331556503198294, |
| "grad_norm": 0.46808692540880764, |
| "learning_rate": 1.4185987650801286e-06, |
| "loss": 0.5593, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.3347547974413647, |
| "grad_norm": 0.4422620648261011, |
| "learning_rate": 1.4056056517447637e-06, |
| "loss": 0.5725, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.337953091684435, |
| "grad_norm": 0.47954403115117483, |
| "learning_rate": 1.392662576506007e-06, |
| "loss": 0.5761, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.3411513859275055, |
| "grad_norm": 0.4546489872200875, |
| "learning_rate": 1.3797697195463278e-06, |
| "loss": 0.5871, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.3443496801705757, |
| "grad_norm": 0.4500895428397602, |
| "learning_rate": 1.3669272603491002e-06, |
| "loss": 0.5644, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.3475479744136463, |
| "grad_norm": 0.4805002284443759, |
| "learning_rate": 1.3541353776961035e-06, |
| "loss": 0.5714, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.3507462686567164, |
| "grad_norm": 0.450604856228356, |
| "learning_rate": 1.3413942496650301e-06, |
| "loss": 0.5569, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.3539445628997866, |
| "grad_norm": 0.44449130255853053, |
| "learning_rate": 1.3287040536270135e-06, |
| "loss": 0.5673, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.357142857142857, |
| "grad_norm": 0.4311651330481667, |
| "learning_rate": 1.3160649662441532e-06, |
| "loss": 0.5649, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.3603411513859274, |
| "grad_norm": 0.5481566773492595, |
| "learning_rate": 1.30347716346706e-06, |
| "loss": 0.5805, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.363539445628998, |
| "grad_norm": 0.4694122862360678, |
| "learning_rate": 1.290940820532403e-06, |
| "loss": 0.5862, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.366737739872068, |
| "grad_norm": 0.4808027009258574, |
| "learning_rate": 1.2784561119604683e-06, |
| "loss": 0.5844, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.3699360341151388, |
| "grad_norm": 0.4741021908385686, |
| "learning_rate": 1.266023211552736e-06, |
| "loss": 0.5927, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.373134328358209, |
| "grad_norm": 0.5070129440283249, |
| "learning_rate": 1.2536422923894565e-06, |
| "loss": 0.5989, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.3763326226012795, |
| "grad_norm": 0.49982577127543343, |
| "learning_rate": 1.2413135268272403e-06, |
| "loss": 0.6086, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.3795309168443497, |
| "grad_norm": 0.5171582506065449, |
| "learning_rate": 1.2290370864966623e-06, |
| "loss": 0.5935, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.38272921108742, |
| "grad_norm": 0.5058849568215932, |
| "learning_rate": 1.2168131422998653e-06, |
| "loss": 0.5788, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.3859275053304905, |
| "grad_norm": 0.49576030363447515, |
| "learning_rate": 1.2046418644081904e-06, |
| "loss": 0.6084, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.3891257995735606, |
| "grad_norm": 0.4737787057177936, |
| "learning_rate": 1.192523422259802e-06, |
| "loss": 0.5667, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.3923240938166312, |
| "grad_norm": 0.4628087834904147, |
| "learning_rate": 1.1804579845573288e-06, |
| "loss": 0.6369, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.3955223880597014, |
| "grad_norm": 0.44818958645228585, |
| "learning_rate": 1.1684457192655207e-06, |
| "loss": 0.5785, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.398720682302772, |
| "grad_norm": 0.44830220311328195, |
| "learning_rate": 1.156486793608899e-06, |
| "loss": 0.5932, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.401918976545842, |
| "grad_norm": 0.5032388277593316, |
| "learning_rate": 1.144581374069444e-06, |
| "loss": 0.6266, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.405117270788913, |
| "grad_norm": 0.5016006802472114, |
| "learning_rate": 1.1327296263842653e-06, |
| "loss": 0.5956, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.408315565031983, |
| "grad_norm": 0.4643916700878555, |
| "learning_rate": 1.120931715543299e-06, |
| "loss": 0.5694, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.411513859275053, |
| "grad_norm": 0.44920509828052607, |
| "learning_rate": 1.1091878057870137e-06, |
| "loss": 0.6153, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.4147121535181237, |
| "grad_norm": 0.45686584478530823, |
| "learning_rate": 1.0974980606041152e-06, |
| "loss": 0.5805, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.417910447761194, |
| "grad_norm": 0.46553788908049154, |
| "learning_rate": 1.0858626427292796e-06, |
| "loss": 0.599, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.4211087420042645, |
| "grad_norm": 0.5153605958479428, |
| "learning_rate": 1.074281714140884e-06, |
| "loss": 0.6158, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.4243070362473347, |
| "grad_norm": 0.5316883972134206, |
| "learning_rate": 1.0627554360587533e-06, |
| "loss": 0.6284, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.4275053304904053, |
| "grad_norm": 0.4612691902886173, |
| "learning_rate": 1.0512839689419124e-06, |
| "loss": 0.629, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.4307036247334755, |
| "grad_norm": 0.45681038140195945, |
| "learning_rate": 1.0398674724863584e-06, |
| "loss": 0.5693, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.433901918976546, |
| "grad_norm": 0.48533257603226454, |
| "learning_rate": 1.0285061056228273e-06, |
| "loss": 0.5905, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.4371002132196162, |
| "grad_norm": 0.4746224830922052, |
| "learning_rate": 1.0172000265145938e-06, |
| "loss": 0.6055, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.4402985074626864, |
| "grad_norm": 0.5086026458920127, |
| "learning_rate": 1.0059493925552604e-06, |
| "loss": 0.5744, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.443496801705757, |
| "grad_norm": 0.4514225973264305, |
| "learning_rate": 9.947543603665711e-07, |
| "loss": 0.5964, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.446695095948827, |
| "grad_norm": 0.475954682216827, |
| "learning_rate": 9.836150857962296e-07, |
| "loss": 0.6195, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.449893390191898, |
| "grad_norm": 0.516019394227879, |
| "learning_rate": 9.72531723915726e-07, |
| "loss": 0.5606, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.453091684434968, |
| "grad_norm": 0.4863472899308536, |
| "learning_rate": 9.615044290181863e-07, |
| "loss": 0.5711, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.4562899786780386, |
| "grad_norm": 0.433161350310435, |
| "learning_rate": 9.505333546162171e-07, |
| "loss": 0.61, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.4594882729211087, |
| "grad_norm": 0.4744290062548913, |
| "learning_rate": 9.396186534397711e-07, |
| "loss": 0.5657, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.4626865671641793, |
| "grad_norm": 0.4438296651802267, |
| "learning_rate": 9.287604774340236e-07, |
| "loss": 0.6051, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.4658848614072495, |
| "grad_norm": 0.4527860416616376, |
| "learning_rate": 9.179589777572496e-07, |
| "loss": 0.5531, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.4690831556503197, |
| "grad_norm": 0.4863675854140087, |
| "learning_rate": 9.07214304778729e-07, |
| "loss": 0.5948, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.4722814498933903, |
| "grad_norm": 0.45267687004556684, |
| "learning_rate": 8.965266080766471e-07, |
| "loss": 0.5954, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.4754797441364604, |
| "grad_norm": 0.4577039509757653, |
| "learning_rate": 8.858960364360142e-07, |
| "loss": 0.5749, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.478678038379531, |
| "grad_norm": 0.42098442226808497, |
| "learning_rate": 8.753227378465956e-07, |
| "loss": 0.5503, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.481876332622601, |
| "grad_norm": 0.4593978666976355, |
| "learning_rate": 8.648068595008458e-07, |
| "loss": 0.6078, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.485074626865672, |
| "grad_norm": 0.4859190992732641, |
| "learning_rate": 8.543485477918672e-07, |
| "loss": 0.5579, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.488272921108742, |
| "grad_norm": 0.43344193599128084, |
| "learning_rate": 8.439479483113683e-07, |
| "loss": 0.578, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.4914712153518126, |
| "grad_norm": 0.46299605001742955, |
| "learning_rate": 8.336052058476374e-07, |
| "loss": 0.5936, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.4946695095948828, |
| "grad_norm": 0.48867807045497547, |
| "learning_rate": 8.233204643835235e-07, |
| "loss": 0.5904, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.497867803837953, |
| "grad_norm": 0.49771350857070334, |
| "learning_rate": 8.130938670944377e-07, |
| "loss": 0.6007, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.5010660980810235, |
| "grad_norm": 0.4577972103614504, |
| "learning_rate": 8.029255563463589e-07, |
| "loss": 0.5895, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.5042643923240937, |
| "grad_norm": 0.47989050108275627, |
| "learning_rate": 7.928156736938458e-07, |
| "loss": 0.5949, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.5074626865671643, |
| "grad_norm": 0.4371318457371128, |
| "learning_rate": 7.827643598780748e-07, |
| "loss": 0.5528, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.5106609808102345, |
| "grad_norm": 0.46786863175003857, |
| "learning_rate": 7.72771754824877e-07, |
| "loss": 0.6073, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.5138592750533046, |
| "grad_norm": 0.44519658517690325, |
| "learning_rate": 7.628379976427868e-07, |
| "loss": 0.6135, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.5170575692963753, |
| "grad_norm": 0.44205578681704094, |
| "learning_rate": 7.529632266211112e-07, |
| "loss": 0.5942, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.520255863539446, |
| "grad_norm": 0.4875271600407386, |
| "learning_rate": 7.431475792280018e-07, |
| "loss": 0.5807, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.523454157782516, |
| "grad_norm": 0.4710353166878217, |
| "learning_rate": 7.333911921085418e-07, |
| "loss": 0.6056, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.526652452025586, |
| "grad_norm": 0.48054394292285285, |
| "learning_rate": 7.23694201082843e-07, |
| "loss": 0.5591, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.529850746268657, |
| "grad_norm": 0.4868072363712072, |
| "learning_rate": 7.140567411441529e-07, |
| "loss": 0.5832, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.533049040511727, |
| "grad_norm": 0.4412560252403492, |
| "learning_rate": 7.044789464569817e-07, |
| "loss": 0.5937, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.5362473347547976, |
| "grad_norm": 0.4511415013667377, |
| "learning_rate": 6.94960950355229e-07, |
| "loss": 0.6048, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.5394456289978677, |
| "grad_norm": 0.4454888744137283, |
| "learning_rate": 6.855028853403295e-07, |
| "loss": 0.6137, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.542643923240938, |
| "grad_norm": 0.5107375323074698, |
| "learning_rate": 6.761048830794098e-07, |
| "loss": 0.6191, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.5458422174840085, |
| "grad_norm": 0.46547987418755626, |
| "learning_rate": 6.667670744034498e-07, |
| "loss": 0.5814, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.549040511727079, |
| "grad_norm": 0.4703517207855011, |
| "learning_rate": 6.574895893054711e-07, |
| "loss": 0.5778, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.5522388059701493, |
| "grad_norm": 0.4746163195915809, |
| "learning_rate": 6.482725569387171e-07, |
| "loss": 0.5706, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.5554371002132195, |
| "grad_norm": 0.470907057244819, |
| "learning_rate": 6.391161056148637e-07, |
| "loss": 0.5868, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.55863539445629, |
| "grad_norm": 0.4623234041016843, |
| "learning_rate": 6.300203628022272e-07, |
| "loss": 0.5983, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.5618336886993602, |
| "grad_norm": 0.42562483606080814, |
| "learning_rate": 6.209854551239902e-07, |
| "loss": 0.5927, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.565031982942431, |
| "grad_norm": 0.5128197818499242, |
| "learning_rate": 6.120115083564432e-07, |
| "loss": 0.5741, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.568230277185501, |
| "grad_norm": 0.47049712813520317, |
| "learning_rate": 6.030986474272288e-07, |
| "loss": 0.584, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 0.44693735376914034, |
| "learning_rate": 5.942469964136055e-07, |
| "loss": 0.5903, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.574626865671642, |
| "grad_norm": 0.42861119739729087, |
| "learning_rate": 5.854566785407212e-07, |
| "loss": 0.597, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.5778251599147124, |
| "grad_norm": 0.44886124383973774, |
| "learning_rate": 5.767278161798912e-07, |
| "loss": 0.5721, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.5810234541577826, |
| "grad_norm": 0.4202452468317384, |
| "learning_rate": 5.680605308469045e-07, |
| "loss": 0.5828, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.5842217484008527, |
| "grad_norm": 0.4235020132956652, |
| "learning_rate": 5.594549432003244e-07, |
| "loss": 0.5846, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.5874200426439233, |
| "grad_norm": 0.4603963689104671, |
| "learning_rate": 5.509111730398125e-07, |
| "loss": 0.5753, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.5906183368869935, |
| "grad_norm": 0.47640520792175567, |
| "learning_rate": 5.42429339304461e-07, |
| "loss": 0.5813, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.593816631130064, |
| "grad_norm": 0.4964363064529672, |
| "learning_rate": 5.340095600711343e-07, |
| "loss": 0.5483, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.5970149253731343, |
| "grad_norm": 0.4188032394181886, |
| "learning_rate": 5.256519525528254e-07, |
| "loss": 0.6258, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.6002132196162044, |
| "grad_norm": 0.4418308069828843, |
| "learning_rate": 5.173566330970286e-07, |
| "loss": 0.5858, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.603411513859275, |
| "grad_norm": 0.4729321718897582, |
| "learning_rate": 5.091237171841173e-07, |
| "loss": 0.56, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.6066098081023457, |
| "grad_norm": 0.4393226839327768, |
| "learning_rate": 5.009533194257332e-07, |
| "loss": 0.5494, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.609808102345416, |
| "grad_norm": 0.44734542269470695, |
| "learning_rate": 4.92845553563196e-07, |
| "loss": 0.626, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.613006396588486, |
| "grad_norm": 0.4501393477711489, |
| "learning_rate": 4.848005324659144e-07, |
| "loss": 0.5966, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.6162046908315566, |
| "grad_norm": 0.4524819111331228, |
| "learning_rate": 4.768183681298211e-07, |
| "loss": 0.5816, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.6194029850746268, |
| "grad_norm": 0.44348410421643397, |
| "learning_rate": 4.6889917167580903e-07, |
| "loss": 0.5752, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.6226012793176974, |
| "grad_norm": 0.46496313577314546, |
| "learning_rate": 4.6104305334818577e-07, |
| "loss": 0.5894, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.6257995735607675, |
| "grad_norm": 0.45530128557907246, |
| "learning_rate": 4.532501225131408e-07, |
| "loss": 0.628, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.6289978678038377, |
| "grad_norm": 0.4557231787890459, |
| "learning_rate": 4.455204876572172e-07, |
| "loss": 0.5963, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.6321961620469083, |
| "grad_norm": 0.413730075405882, |
| "learning_rate": 4.3785425638580847e-07, |
| "loss": 0.5895, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.635394456289979, |
| "grad_norm": 0.46478458824267105, |
| "learning_rate": 4.3025153542165744e-07, |
| "loss": 0.5628, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.638592750533049, |
| "grad_norm": 0.4660432242750011, |
| "learning_rate": 4.2271243060336976e-07, |
| "loss": 0.5843, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.6417910447761193, |
| "grad_norm": 0.45857321057691036, |
| "learning_rate": 4.1523704688394176e-07, |
| "loss": 0.569, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.64498933901919, |
| "grad_norm": 0.454757225375927, |
| "learning_rate": 4.0782548832929646e-07, |
| "loss": 0.5631, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.64818763326226, |
| "grad_norm": 0.4219569542186656, |
| "learning_rate": 4.0047785811684116e-07, |
| "loss": 0.5923, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.6513859275053306, |
| "grad_norm": 0.407295453366224, |
| "learning_rate": 3.931942585340243e-07, |
| "loss": 0.6345, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.654584221748401, |
| "grad_norm": 0.4422205956943115, |
| "learning_rate": 3.8597479097691626e-07, |
| "loss": 0.6016, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.657782515991471, |
| "grad_norm": 0.4322372658598502, |
| "learning_rate": 3.788195559487956e-07, |
| "loss": 0.5717, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.6609808102345416, |
| "grad_norm": 0.446074402964079, |
| "learning_rate": 3.717286530587483e-07, |
| "loss": 0.5933, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.664179104477612, |
| "grad_norm": 0.44149384007245823, |
| "learning_rate": 3.6470218102028607e-07, |
| "loss": 0.5935, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.6673773987206824, |
| "grad_norm": 0.48606195110523104, |
| "learning_rate": 3.577402376499672e-07, |
| "loss": 0.5632, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.6705756929637525, |
| "grad_norm": 0.45172402428831737, |
| "learning_rate": 3.508429198660379e-07, |
| "loss": 0.6271, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.673773987206823, |
| "grad_norm": 0.4783802058190356, |
| "learning_rate": 3.440103236870823e-07, |
| "loss": 0.5614, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.6769722814498933, |
| "grad_norm": 0.4125951168982884, |
| "learning_rate": 3.372425442306837e-07, |
| "loss": 0.6403, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.680170575692964, |
| "grad_norm": 0.45804236331506576, |
| "learning_rate": 3.3053967571210375e-07, |
| "loss": 0.6235, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.683368869936034, |
| "grad_norm": 0.4481909177996369, |
| "learning_rate": 3.2390181144296815e-07, |
| "loss": 0.6079, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.6865671641791042, |
| "grad_norm": 0.44229637332098864, |
| "learning_rate": 3.1732904382996975e-07, |
| "loss": 0.6046, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.689765458422175, |
| "grad_norm": 0.456497771982713, |
| "learning_rate": 3.108214643735813e-07, |
| "loss": 0.6003, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.6929637526652455, |
| "grad_norm": 0.43916739323489756, |
| "learning_rate": 3.04379163666782e-07, |
| "loss": 0.596, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.6961620469083156, |
| "grad_norm": 0.4833880509304977, |
| "learning_rate": 2.98002231393793e-07, |
| "loss": 0.602, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.699360341151386, |
| "grad_norm": 0.44190751175823845, |
| "learning_rate": 2.916907563288357e-07, |
| "loss": 0.6089, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.7025586353944564, |
| "grad_norm": 0.4409073668802216, |
| "learning_rate": 2.854448263348891e-07, |
| "loss": 0.6085, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.7057569296375266, |
| "grad_norm": 0.44376266623437516, |
| "learning_rate": 2.792645283624712e-07, |
| "loss": 0.5517, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.708955223880597, |
| "grad_norm": 0.44030030505529505, |
| "learning_rate": 2.7314994844842623e-07, |
| "loss": 0.5704, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.7121535181236673, |
| "grad_norm": 0.4811909570823317, |
| "learning_rate": 2.671011717147276e-07, |
| "loss": 0.5887, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.7153518123667375, |
| "grad_norm": 0.46435424416208204, |
| "learning_rate": 2.611182823672931e-07, |
| "loss": 0.5753, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.718550106609808, |
| "grad_norm": 0.45062900256380783, |
| "learning_rate": 2.5520136369481194e-07, |
| "loss": 0.5956, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.7217484008528787, |
| "grad_norm": 0.4373848417355698, |
| "learning_rate": 2.493504980675865e-07, |
| "loss": 0.5725, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.724946695095949, |
| "grad_norm": 0.41456459520431993, |
| "learning_rate": 2.4356576693638555e-07, |
| "loss": 0.5989, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.728144989339019, |
| "grad_norm": 0.45641172841307615, |
| "learning_rate": 2.3784725083130678e-07, |
| "loss": 0.5394, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.7313432835820897, |
| "grad_norm": 0.41453418124512204, |
| "learning_rate": 2.3219502936066228e-07, |
| "loss": 0.5822, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.73454157782516, |
| "grad_norm": 0.47178126843622886, |
| "learning_rate": 2.266091812098642e-07, |
| "loss": 0.6153, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.7377398720682304, |
| "grad_norm": 0.415357561340448, |
| "learning_rate": 2.210897841403331e-07, |
| "loss": 0.6319, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.7409381663113006, |
| "grad_norm": 0.429713391850993, |
| "learning_rate": 2.1563691498841465e-07, |
| "loss": 0.6076, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.7441364605543708, |
| "grad_norm": 0.4364411910317568, |
| "learning_rate": 2.1025064966430697e-07, |
| "loss": 0.5809, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.7473347547974414, |
| "grad_norm": 0.4438187825256016, |
| "learning_rate": 2.0493106315100987e-07, |
| "loss": 0.6077, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.750533049040512, |
| "grad_norm": 0.4396721972544317, |
| "learning_rate": 1.9967822950327453e-07, |
| "loss": 0.5649, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.753731343283582, |
| "grad_norm": 0.4087723994643433, |
| "learning_rate": 1.944922218465778e-07, |
| "loss": 0.5983, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.7569296375266523, |
| "grad_norm": 0.45839789417365656, |
| "learning_rate": 1.8937311237610168e-07, |
| "loss": 0.5666, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.760127931769723, |
| "grad_norm": 0.4623951503889097, |
| "learning_rate": 1.8432097235572655e-07, |
| "loss": 0.5814, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.763326226012793, |
| "grad_norm": 0.42397071107319473, |
| "learning_rate": 1.793358721170435e-07, |
| "loss": 0.6227, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.7665245202558637, |
| "grad_norm": 0.46632194080900585, |
| "learning_rate": 1.7441788105837133e-07, |
| "loss": 0.5687, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.769722814498934, |
| "grad_norm": 0.42383172618691023, |
| "learning_rate": 1.6956706764379438e-07, |
| "loss": 0.5571, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.772921108742004, |
| "grad_norm": 0.42444537413289984, |
| "learning_rate": 1.6478349940220294e-07, |
| "loss": 0.5672, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.7761194029850746, |
| "grad_norm": 0.442713023638936, |
| "learning_rate": 1.6006724292636166e-07, |
| "loss": 0.6251, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.7793176972281453, |
| "grad_norm": 0.44882872466351875, |
| "learning_rate": 1.5541836387197528e-07, |
| "loss": 0.6309, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.7825159914712154, |
| "grad_norm": 0.4426652587385554, |
| "learning_rate": 1.508369269567783e-07, |
| "loss": 0.6038, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.7857142857142856, |
| "grad_norm": 0.43159736313225455, |
| "learning_rate": 1.4632299595963294e-07, |
| "loss": 0.6136, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.788912579957356, |
| "grad_norm": 0.4328150604393648, |
| "learning_rate": 1.418766337196431e-07, |
| "loss": 0.5821, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.7921108742004264, |
| "grad_norm": 0.4510225598844259, |
| "learning_rate": 1.374979021352757e-07, |
| "loss": 0.544, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.795309168443497, |
| "grad_norm": 0.42414299491852264, |
| "learning_rate": 1.3318686216350241e-07, |
| "loss": 0.5917, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.798507462686567, |
| "grad_norm": 0.4491433631888564, |
| "learning_rate": 1.2894357381894984e-07, |
| "loss": 0.5709, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.8017057569296373, |
| "grad_norm": 0.435024650739532, |
| "learning_rate": 1.2476809617306408e-07, |
| "loss": 0.5768, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.804904051172708, |
| "grad_norm": 0.4376084724071413, |
| "learning_rate": 1.206604873532885e-07, |
| "loss": 0.5747, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.8081023454157785, |
| "grad_norm": 0.4430099826070007, |
| "learning_rate": 1.166208045422551e-07, |
| "loss": 0.5671, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.8113006396588487, |
| "grad_norm": 0.45821945748201703, |
| "learning_rate": 1.1264910397698614e-07, |
| "loss": 0.5801, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.814498933901919, |
| "grad_norm": 0.42968896616005114, |
| "learning_rate": 1.0874544094811424e-07, |
| "loss": 0.609, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.8176972281449895, |
| "grad_norm": 0.4392112009989005, |
| "learning_rate": 1.0490986979911189e-07, |
| "loss": 0.6004, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.8208955223880596, |
| "grad_norm": 0.4678179782402967, |
| "learning_rate": 1.0114244392553318e-07, |
| "loss": 0.5799, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.8240938166311302, |
| "grad_norm": 0.4649676388307849, |
| "learning_rate": 9.744321577427218e-08, |
| "loss": 0.5694, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.8272921108742004, |
| "grad_norm": 0.4410645863970879, |
| "learning_rate": 9.381223684283291e-08, |
| "loss": 0.5792, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.8304904051172706, |
| "grad_norm": 0.41344134456514686, |
| "learning_rate": 9.024955767861054e-08, |
| "loss": 0.6179, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.833688699360341, |
| "grad_norm": 0.45207918900612454, |
| "learning_rate": 8.675522787819023e-08, |
| "loss": 0.5765, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.836886993603412, |
| "grad_norm": 0.4815854246181784, |
| "learning_rate": 8.332929608665553e-08, |
| "loss": 0.5622, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.840085287846482, |
| "grad_norm": 0.4104529223314507, |
| "learning_rate": 7.997180999691101e-08, |
| "loss": 0.6061, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.843283582089552, |
| "grad_norm": 0.43863504264752207, |
| "learning_rate": 7.668281634901686e-08, |
| "loss": 0.6205, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.8464818763326227, |
| "grad_norm": 0.42107787916551426, |
| "learning_rate": 7.346236092954318e-08, |
| "loss": 0.6112, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.849680170575693, |
| "grad_norm": 0.45281531695898647, |
| "learning_rate": 7.031048857092604e-08, |
| "loss": 0.5897, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.8528784648187635, |
| "grad_norm": 0.4461369389804025, |
| "learning_rate": 6.722724315084805e-08, |
| "loss": 0.5908, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.8560767590618337, |
| "grad_norm": 0.4523838631460964, |
| "learning_rate": 6.421266759162659e-08, |
| "loss": 0.6059, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.859275053304904, |
| "grad_norm": 0.4201076758194753, |
| "learning_rate": 6.12668038596137e-08, |
| "loss": 0.6017, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.8624733475479744, |
| "grad_norm": 0.4576782992482849, |
| "learning_rate": 5.838969296461605e-08, |
| "loss": 0.5595, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.8656716417910446, |
| "grad_norm": 0.452789058187118, |
| "learning_rate": 5.5581374959320366e-08, |
| "loss": 0.6069, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.868869936034115, |
| "grad_norm": 0.4502806418223326, |
| "learning_rate": 5.2841888938738314e-08, |
| "loss": 0.595, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.8720682302771854, |
| "grad_norm": 0.4291345074640387, |
| "learning_rate": 5.017127303966085e-08, |
| "loss": 0.5737, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.875266524520256, |
| "grad_norm": 0.4275943368139046, |
| "learning_rate": 4.7569564440128055e-08, |
| "loss": 0.5802, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.878464818763326, |
| "grad_norm": 0.4783054811845979, |
| "learning_rate": 4.50367993589107e-08, |
| "loss": 0.5523, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.8816631130063968, |
| "grad_norm": 0.4207771911644098, |
| "learning_rate": 4.257301305500672e-08, |
| "loss": 0.6295, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.884861407249467, |
| "grad_norm": 0.4709474401474473, |
| "learning_rate": 4.0178239827151077e-08, |
| "loss": 0.5824, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.888059701492537, |
| "grad_norm": 0.47155045759162584, |
| "learning_rate": 3.785251301333726e-08, |
| "loss": 0.594, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.8912579957356077, |
| "grad_norm": 0.4456059748610983, |
| "learning_rate": 3.559586499035206e-08, |
| "loss": 0.5662, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.894456289978678, |
| "grad_norm": 0.4575365472232499, |
| "learning_rate": 3.340832717332765e-08, |
| "loss": 0.596, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.8976545842217485, |
| "grad_norm": 0.45897611846221986, |
| "learning_rate": 3.128993001530245e-08, |
| "loss": 0.5467, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.9008528784648187, |
| "grad_norm": 0.46086480691517584, |
| "learning_rate": 2.9240703006797044e-08, |
| "loss": 0.591, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.9040511727078893, |
| "grad_norm": 0.4578137617838968, |
| "learning_rate": 2.7260674675404498e-08, |
| "loss": 0.5807, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.9072494669509594, |
| "grad_norm": 0.4744681575485193, |
| "learning_rate": 2.5349872585392898e-08, |
| "loss": 0.5882, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.91044776119403, |
| "grad_norm": 0.43778348787470217, |
| "learning_rate": 2.3508323337321225e-08, |
| "loss": 0.555, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.9136460554371, |
| "grad_norm": 0.43656355895512294, |
| "learning_rate": 2.1736052567670195e-08, |
| "loss": 0.5777, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.9168443496801704, |
| "grad_norm": 0.4664832575605358, |
| "learning_rate": 2.0033084948483104e-08, |
| "loss": 0.5442, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.920042643923241, |
| "grad_norm": 0.438143790231801, |
| "learning_rate": 1.8399444187024995e-08, |
| "loss": 0.5745, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.923240938166311, |
| "grad_norm": 0.42188051822462064, |
| "learning_rate": 1.6835153025451246e-08, |
| "loss": 0.6018, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.9264392324093818, |
| "grad_norm": 0.41865517546309167, |
| "learning_rate": 1.534023324049061e-08, |
| "loss": 0.5891, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.929637526652452, |
| "grad_norm": 0.41837714838468193, |
| "learning_rate": 1.3914705643143788e-08, |
| "loss": 0.6085, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.9328358208955225, |
| "grad_norm": 0.465979632617469, |
| "learning_rate": 1.2558590078390886e-08, |
| "loss": 0.5841, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.9360341151385927, |
| "grad_norm": 0.45654976609627784, |
| "learning_rate": 1.1271905424918294e-08, |
| "loss": 0.6035, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.9392324093816633, |
| "grad_norm": 0.45629479117279054, |
| "learning_rate": 1.0054669594853905e-08, |
| "loss": 0.5988, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.9424307036247335, |
| "grad_norm": 0.44536389990804115, |
| "learning_rate": 8.906899533517866e-09, |
| "loss": 0.6208, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.9456289978678036, |
| "grad_norm": 0.4513455406012691, |
| "learning_rate": 7.828611219187765e-09, |
| "loss": 0.6199, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.9488272921108742, |
| "grad_norm": 0.43738097436145257, |
| "learning_rate": 6.819819662874372e-09, |
| "loss": 0.5942, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.9520255863539444, |
| "grad_norm": 0.45522291904606826, |
| "learning_rate": 5.88053890811513e-09, |
| "loss": 0.6035, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.955223880597015, |
| "grad_norm": 0.4521268106702668, |
| "learning_rate": 5.0107820307770945e-09, |
| "loss": 0.6053, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.958422174840085, |
| "grad_norm": 0.4313674140155561, |
| "learning_rate": 4.210561138873193e-09, |
| "loss": 0.58, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.961620469083156, |
| "grad_norm": 0.4389755845527305, |
| "learning_rate": 3.4798873723984604e-09, |
| "loss": 0.6198, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.964818763326226, |
| "grad_norm": 0.4358918838142038, |
| "learning_rate": 2.818770903170176e-09, |
| "loss": 0.6201, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.9680170575692966, |
| "grad_norm": 0.446788281214219, |
| "learning_rate": 2.2272209346885233e-09, |
| "loss": 0.5517, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.9712153518123667, |
| "grad_norm": 0.4627514418027653, |
| "learning_rate": 1.7052457020089175e-09, |
| "loss": 0.602, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.974413646055437, |
| "grad_norm": 0.46487447660834663, |
| "learning_rate": 1.2528524716259872e-09, |
| "loss": 0.5523, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.9776119402985075, |
| "grad_norm": 0.4291177591260577, |
| "learning_rate": 8.700475413719877e-10, |
| "loss": 0.6038, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.9808102345415777, |
| "grad_norm": 0.4440151991197288, |
| "learning_rate": 5.568362403318706e-10, |
| "loss": 0.5722, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.9840085287846483, |
| "grad_norm": 0.41546760769750524, |
| "learning_rate": 3.132229287666766e-10, |
| "loss": 0.6158, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.9872068230277184, |
| "grad_norm": 0.47012397918132254, |
| "learning_rate": 1.3921099805302985e-10, |
| "loss": 0.612, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.990405117270789, |
| "grad_norm": 0.4452241770705769, |
| "learning_rate": 3.480287063706289e-11, |
| "loss": 0.5822, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.9936034115138592, |
| "grad_norm": 0.4503894897532014, |
| "learning_rate": 0.0, |
| "loss": 0.5513, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.9936034115138592, |
| "step": 936, |
| "total_flos": 9.051650706620744e+17, |
| "train_loss": 0.0, |
| "train_runtime": 4.9645, |
| "train_samples_per_second": 18128.643, |
| "train_steps_per_second": 188.538 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 936, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.051650706620744e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|