| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "eval_steps": 0, | |
| "global_step": 235740, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06362942224484602, | |
| "grad_norm": 1662.35009765625, | |
| "learning_rate": 4.86e-07, | |
| "loss": 92.5416, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12725884448969205, | |
| "grad_norm": 288.2401428222656, | |
| "learning_rate": 9.86e-07, | |
| "loss": 20.6659, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19088826673453804, | |
| "grad_norm": 56.13795852661133, | |
| "learning_rate": 1.4860000000000003e-06, | |
| "loss": 14.7631, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2545176889793841, | |
| "grad_norm": 102.28019714355469, | |
| "learning_rate": 1.986e-06, | |
| "loss": 14.3025, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.31814711122423006, | |
| "grad_norm": 155.62403869628906, | |
| "learning_rate": 2.486e-06, | |
| "loss": 13.5257, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3817765334690761, | |
| "grad_norm": 210.75811767578125, | |
| "learning_rate": 2.986e-06, | |
| "loss": 12.8666, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4454059557139221, | |
| "grad_norm": 256.039306640625, | |
| "learning_rate": 3.4860000000000006e-06, | |
| "loss": 12.397, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5090353779587682, | |
| "grad_norm": 227.79017639160156, | |
| "learning_rate": 3.9860000000000005e-06, | |
| "loss": 12.2718, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5726648002036141, | |
| "grad_norm": 307.928955078125, | |
| "learning_rate": 4.486000000000001e-06, | |
| "loss": 11.539, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.6362942224484601, | |
| "grad_norm": 199.85580444335938, | |
| "learning_rate": 4.986e-06, | |
| "loss": 11.1145, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6999236446933061, | |
| "grad_norm": 236.899169921875, | |
| "learning_rate": 5.4860000000000005e-06, | |
| "loss": 11.1232, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.7635530669381522, | |
| "grad_norm": 265.123046875, | |
| "learning_rate": 5.986000000000001e-06, | |
| "loss": 10.6021, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.8271824891829982, | |
| "grad_norm": 254.1043701171875, | |
| "learning_rate": 6.486e-06, | |
| "loss": 10.4115, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.8908119114278442, | |
| "grad_norm": 172.3489990234375, | |
| "learning_rate": 6.9860000000000005e-06, | |
| "loss": 10.4529, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9544413336726902, | |
| "grad_norm": 374.72003173828125, | |
| "learning_rate": 7.486000000000001e-06, | |
| "loss": 10.1329, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.0180707559175364, | |
| "grad_norm": 320.3682556152344, | |
| "learning_rate": 7.985e-06, | |
| "loss": 10.1367, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.0817001781623823, | |
| "grad_norm": 297.0594787597656, | |
| "learning_rate": 8.485000000000001e-06, | |
| "loss": 9.5914, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.1453296004072282, | |
| "grad_norm": 266.2686767578125, | |
| "learning_rate": 8.985000000000001e-06, | |
| "loss": 9.2799, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.2089590226520743, | |
| "grad_norm": 168.0514373779297, | |
| "learning_rate": 9.485000000000002e-06, | |
| "loss": 9.266, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.2725884448969205, | |
| "grad_norm": 213.7965545654297, | |
| "learning_rate": 9.985000000000002e-06, | |
| "loss": 9.1661, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.3362178671417664, | |
| "grad_norm": 189.05682373046875, | |
| "learning_rate": 9.978515105874015e-06, | |
| "loss": 8.954, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.3998472893866123, | |
| "grad_norm": 230.05084228515625, | |
| "learning_rate": 9.956365730486402e-06, | |
| "loss": 8.9562, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.4634767116314584, | |
| "grad_norm": 314.4221496582031, | |
| "learning_rate": 9.934304952600337e-06, | |
| "loss": 9.4717, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.5271061338763046, | |
| "grad_norm": 190.9048614501953, | |
| "learning_rate": 9.912155577212723e-06, | |
| "loss": 8.6758, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.5907355561211505, | |
| "grad_norm": 3140.1875, | |
| "learning_rate": 9.89000620182511e-06, | |
| "loss": 8.87, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.6543649783659964, | |
| "grad_norm": 396.64117431640625, | |
| "learning_rate": 9.867856826437496e-06, | |
| "loss": 8.5826, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.7179944006108423, | |
| "grad_norm": 171.70077514648438, | |
| "learning_rate": 9.845707451049881e-06, | |
| "loss": 8.4827, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.7816238228556884, | |
| "grad_norm": 269.8551940917969, | |
| "learning_rate": 9.823558075662267e-06, | |
| "loss": 8.5306, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.8452532451005346, | |
| "grad_norm": 255.013671875, | |
| "learning_rate": 9.801408700274653e-06, | |
| "loss": 8.182, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.9088826673453805, | |
| "grad_norm": 194.22486877441406, | |
| "learning_rate": 9.77925932488704e-06, | |
| "loss": 8.3592, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.9725120895902264, | |
| "grad_norm": 149.85800170898438, | |
| "learning_rate": 9.757109949499426e-06, | |
| "loss": 8.3879, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.0361415118350727, | |
| "grad_norm": 156.6005401611328, | |
| "learning_rate": 9.735004872862585e-06, | |
| "loss": 7.4399, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.0997709340799187, | |
| "grad_norm": 286.58648681640625, | |
| "learning_rate": 9.712855497474972e-06, | |
| "loss": 7.0406, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.1634003563247646, | |
| "grad_norm": 242.3479461669922, | |
| "learning_rate": 9.690706122087358e-06, | |
| "loss": 6.89, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.2270297785696105, | |
| "grad_norm": 180.5225372314453, | |
| "learning_rate": 9.668556746699744e-06, | |
| "loss": 6.8651, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.2906592008144564, | |
| "grad_norm": 223.84552001953125, | |
| "learning_rate": 9.64640737131213e-06, | |
| "loss": 6.8461, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.3542886230593028, | |
| "grad_norm": 233.3303680419922, | |
| "learning_rate": 9.624257995924515e-06, | |
| "loss": 6.7663, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.4179180453041487, | |
| "grad_norm": 237.0810546875, | |
| "learning_rate": 9.602108620536902e-06, | |
| "loss": 6.9313, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.4815474675489946, | |
| "grad_norm": 176.5728302001953, | |
| "learning_rate": 9.579959245149288e-06, | |
| "loss": 6.9688, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.545176889793841, | |
| "grad_norm": 184.43077087402344, | |
| "learning_rate": 9.557809869761674e-06, | |
| "loss": 6.7821, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.608806312038687, | |
| "grad_norm": 182.1748809814453, | |
| "learning_rate": 9.535660494374059e-06, | |
| "loss": 6.9468, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.6724357342835328, | |
| "grad_norm": 232.06759643554688, | |
| "learning_rate": 9.51355541773722e-06, | |
| "loss": 6.731, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.7360651565283787, | |
| "grad_norm": 169.12734985351562, | |
| "learning_rate": 9.491406042349606e-06, | |
| "loss": 6.649, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.7996945787732246, | |
| "grad_norm": 153.9056854248047, | |
| "learning_rate": 9.469256666961992e-06, | |
| "loss": 6.7055, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.8633240010180705, | |
| "grad_norm": 252.30517578125, | |
| "learning_rate": 9.447107291574379e-06, | |
| "loss": 6.7744, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.926953423262917, | |
| "grad_norm": 182.51229858398438, | |
| "learning_rate": 9.424957916186765e-06, | |
| "loss": 6.9481, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.9905828455077628, | |
| "grad_norm": 213.7582244873047, | |
| "learning_rate": 9.40280854079915e-06, | |
| "loss": 6.5967, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.0542122677526087, | |
| "grad_norm": 187.1132049560547, | |
| "learning_rate": 9.380659165411536e-06, | |
| "loss": 5.7351, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.117841689997455, | |
| "grad_norm": 157.81378173828125, | |
| "learning_rate": 9.358509790023921e-06, | |
| "loss": 5.4125, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.181471112242301, | |
| "grad_norm": 448.2672424316406, | |
| "learning_rate": 9.336360414636309e-06, | |
| "loss": 5.4095, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.245100534487147, | |
| "grad_norm": 170.9069061279297, | |
| "learning_rate": 9.314211039248694e-06, | |
| "loss": 5.4253, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.3087299567319928, | |
| "grad_norm": 186.37034606933594, | |
| "learning_rate": 9.29206166386108e-06, | |
| "loss": 5.3774, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.3723593789768387, | |
| "grad_norm": 134.44960021972656, | |
| "learning_rate": 9.269912288473466e-06, | |
| "loss": 5.5277, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.435988801221685, | |
| "grad_norm": 268.1274108886719, | |
| "learning_rate": 9.247807211836627e-06, | |
| "loss": 5.4516, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.499618223466531, | |
| "grad_norm": 248.0684814453125, | |
| "learning_rate": 9.225657836449013e-06, | |
| "loss": 5.322, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.563247645711377, | |
| "grad_norm": 214.72317504882812, | |
| "learning_rate": 9.203508461061398e-06, | |
| "loss": 5.5531, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.626877067956223, | |
| "grad_norm": 153.9894256591797, | |
| "learning_rate": 9.181359085673784e-06, | |
| "loss": 5.5238, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.690506490201069, | |
| "grad_norm": 174.88331604003906, | |
| "learning_rate": 9.159209710286171e-06, | |
| "loss": 5.5992, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.754135912445915, | |
| "grad_norm": 301.410888671875, | |
| "learning_rate": 9.137104633649332e-06, | |
| "loss": 5.5351, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.817765334690761, | |
| "grad_norm": 201.53282165527344, | |
| "learning_rate": 9.114955258261718e-06, | |
| "loss": 5.3985, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.881394756935607, | |
| "grad_norm": 212.6214141845703, | |
| "learning_rate": 9.092805882874104e-06, | |
| "loss": 5.4313, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.945024179180453, | |
| "grad_norm": 177.44863891601562, | |
| "learning_rate": 9.07065650748649e-06, | |
| "loss": 5.4173, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.008653601425299, | |
| "grad_norm": 160.0504150390625, | |
| "learning_rate": 9.04855143084965e-06, | |
| "loss": 5.2333, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 4.0722830236701455, | |
| "grad_norm": 150.31857299804688, | |
| "learning_rate": 9.026446354212812e-06, | |
| "loss": 4.3352, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 4.135912445914991, | |
| "grad_norm": 124.97169494628906, | |
| "learning_rate": 9.004296978825197e-06, | |
| "loss": 4.3442, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 4.199541868159837, | |
| "grad_norm": 215.25157165527344, | |
| "learning_rate": 8.982147603437585e-06, | |
| "loss": 4.3288, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.263171290404683, | |
| "grad_norm": 148.4134521484375, | |
| "learning_rate": 8.95999822804997e-06, | |
| "loss": 4.367, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 4.326800712649529, | |
| "grad_norm": 204.40850830078125, | |
| "learning_rate": 8.93789315141313e-06, | |
| "loss": 4.4607, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 4.390430134894375, | |
| "grad_norm": 164.64273071289062, | |
| "learning_rate": 8.915743776025517e-06, | |
| "loss": 4.4461, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 4.454059557139221, | |
| "grad_norm": 204.80953979492188, | |
| "learning_rate": 8.893594400637903e-06, | |
| "loss": 4.6218, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 4.517688979384067, | |
| "grad_norm": 185.70278930664062, | |
| "learning_rate": 8.871445025250289e-06, | |
| "loss": 4.4249, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 4.581318401628913, | |
| "grad_norm": 202.91989135742188, | |
| "learning_rate": 8.849295649862674e-06, | |
| "loss": 4.4129, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 4.64494782387376, | |
| "grad_norm": 164.02198791503906, | |
| "learning_rate": 8.82714627447506e-06, | |
| "loss": 4.4065, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 4.7085772461186055, | |
| "grad_norm": 155.7901153564453, | |
| "learning_rate": 8.804996899087447e-06, | |
| "loss": 4.5452, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 4.772206668363451, | |
| "grad_norm": 194.26280212402344, | |
| "learning_rate": 8.782847523699833e-06, | |
| "loss": 4.5411, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 4.835836090608297, | |
| "grad_norm": 168.18798828125, | |
| "learning_rate": 8.760698148312218e-06, | |
| "loss": 4.5423, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 4.899465512853143, | |
| "grad_norm": 136.41905212402344, | |
| "learning_rate": 8.738548772924604e-06, | |
| "loss": 4.4942, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 4.963094935097989, | |
| "grad_norm": 141.8522491455078, | |
| "learning_rate": 8.71639939753699e-06, | |
| "loss": 4.5332, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 5.026724357342835, | |
| "grad_norm": 149.42271423339844, | |
| "learning_rate": 8.694250022149377e-06, | |
| "loss": 4.0759, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 5.090353779587681, | |
| "grad_norm": 139.2994842529297, | |
| "learning_rate": 8.672100646761763e-06, | |
| "loss": 3.6274, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 5.153983201832528, | |
| "grad_norm": 140.65269470214844, | |
| "learning_rate": 8.649951271374148e-06, | |
| "loss": 3.6795, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 5.217612624077374, | |
| "grad_norm": 139.22752380371094, | |
| "learning_rate": 8.627801895986534e-06, | |
| "loss": 3.6741, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 5.28124204632222, | |
| "grad_norm": 93.71381378173828, | |
| "learning_rate": 8.60565252059892e-06, | |
| "loss": 3.7396, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 5.3448714685670655, | |
| "grad_norm": 118.81936645507812, | |
| "learning_rate": 8.583503145211307e-06, | |
| "loss": 3.6839, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 5.408500890811911, | |
| "grad_norm": 143.53829956054688, | |
| "learning_rate": 8.561353769823692e-06, | |
| "loss": 3.732, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 5.472130313056757, | |
| "grad_norm": 152.01527404785156, | |
| "learning_rate": 8.539248693186852e-06, | |
| "loss": 3.6557, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 5.535759735301603, | |
| "grad_norm": 159.16392517089844, | |
| "learning_rate": 8.517143616550015e-06, | |
| "loss": 3.6925, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 5.599389157546449, | |
| "grad_norm": 143.2123260498047, | |
| "learning_rate": 8.4949942411624e-06, | |
| "loss": 3.7149, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 5.663018579791295, | |
| "grad_norm": 136.5101318359375, | |
| "learning_rate": 8.472844865774786e-06, | |
| "loss": 3.6744, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 5.726648002036142, | |
| "grad_norm": 156.95541381835938, | |
| "learning_rate": 8.450695490387172e-06, | |
| "loss": 3.7669, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 5.790277424280988, | |
| "grad_norm": 137.13330078125, | |
| "learning_rate": 8.428546114999557e-06, | |
| "loss": 3.651, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 5.853906846525834, | |
| "grad_norm": 149.19625854492188, | |
| "learning_rate": 8.406396739611945e-06, | |
| "loss": 3.721, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 5.91753626877068, | |
| "grad_norm": 193.83432006835938, | |
| "learning_rate": 8.384291662975104e-06, | |
| "loss": 3.7012, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 5.9811656910155255, | |
| "grad_norm": 149.3867950439453, | |
| "learning_rate": 8.362186586338266e-06, | |
| "loss": 3.7294, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 6.0447951132603714, | |
| "grad_norm": 144.5869140625, | |
| "learning_rate": 8.340037210950653e-06, | |
| "loss": 3.2432, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 6.108424535505217, | |
| "grad_norm": 138.15234375, | |
| "learning_rate": 8.317887835563039e-06, | |
| "loss": 3.0295, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 6.172053957750063, | |
| "grad_norm": 544.6531372070312, | |
| "learning_rate": 8.295738460175424e-06, | |
| "loss": 3.0364, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 6.23568337999491, | |
| "grad_norm": 124.35468292236328, | |
| "learning_rate": 8.273633383538585e-06, | |
| "loss": 3.0687, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 6.299312802239756, | |
| "grad_norm": 93.38568878173828, | |
| "learning_rate": 8.251484008150971e-06, | |
| "loss": 3.064, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 6.362942224484602, | |
| "grad_norm": 192.03231811523438, | |
| "learning_rate": 8.229334632763357e-06, | |
| "loss": 3.112, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 6.426571646729448, | |
| "grad_norm": 107.92765808105469, | |
| "learning_rate": 8.207185257375742e-06, | |
| "loss": 3.1438, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 6.490201068974294, | |
| "grad_norm": 124.23885345458984, | |
| "learning_rate": 8.185080180738904e-06, | |
| "loss": 3.0733, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 6.55383049121914, | |
| "grad_norm": 154.87612915039062, | |
| "learning_rate": 8.162930805351291e-06, | |
| "loss": 3.1719, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 6.6174599134639855, | |
| "grad_norm": 134.2186737060547, | |
| "learning_rate": 8.140781429963675e-06, | |
| "loss": 3.1355, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 6.6810893357088315, | |
| "grad_norm": 173.08433532714844, | |
| "learning_rate": 8.11863205457606e-06, | |
| "loss": 3.1612, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 6.744718757953677, | |
| "grad_norm": 179.25296020507812, | |
| "learning_rate": 8.096482679188448e-06, | |
| "loss": 3.1938, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 6.808348180198524, | |
| "grad_norm": 138.08518981933594, | |
| "learning_rate": 8.074333303800833e-06, | |
| "loss": 3.1375, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 6.87197760244337, | |
| "grad_norm": 106.96342468261719, | |
| "learning_rate": 8.052183928413219e-06, | |
| "loss": 3.1969, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 6.935607024688216, | |
| "grad_norm": 127.7270278930664, | |
| "learning_rate": 8.030034553025605e-06, | |
| "loss": 3.2214, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 6.999236446933062, | |
| "grad_norm": 151.88905334472656, | |
| "learning_rate": 8.007885177637992e-06, | |
| "loss": 3.1364, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 7.062865869177908, | |
| "grad_norm": 146.13461303710938, | |
| "learning_rate": 7.985735802250378e-06, | |
| "loss": 2.63, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 7.126495291422754, | |
| "grad_norm": 158.6125030517578, | |
| "learning_rate": 7.963586426862763e-06, | |
| "loss": 2.5451, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 7.1901247136676, | |
| "grad_norm": 136.17828369140625, | |
| "learning_rate": 7.941481350225924e-06, | |
| "loss": 2.644, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 7.2537541359124456, | |
| "grad_norm": 183.11447143554688, | |
| "learning_rate": 7.91933197483831e-06, | |
| "loss": 2.6482, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 7.317383558157292, | |
| "grad_norm": 125.30079650878906, | |
| "learning_rate": 7.897182599450696e-06, | |
| "loss": 2.6017, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 7.381012980402138, | |
| "grad_norm": 104.10094451904297, | |
| "learning_rate": 7.875033224063083e-06, | |
| "loss": 2.6626, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 7.444642402646984, | |
| "grad_norm": 153.14060974121094, | |
| "learning_rate": 7.852883848675467e-06, | |
| "loss": 2.6698, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 7.50827182489183, | |
| "grad_norm": 80.38119506835938, | |
| "learning_rate": 7.830734473287854e-06, | |
| "loss": 2.6595, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 7.571901247136676, | |
| "grad_norm": 139.31524658203125, | |
| "learning_rate": 7.80858509790024e-06, | |
| "loss": 2.6683, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 7.635530669381522, | |
| "grad_norm": 135.78240966796875, | |
| "learning_rate": 7.786480021263401e-06, | |
| "loss": 2.7187, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 7.699160091626368, | |
| "grad_norm": 109.59832000732422, | |
| "learning_rate": 7.764330645875787e-06, | |
| "loss": 2.6213, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 7.762789513871214, | |
| "grad_norm": 143.305908203125, | |
| "learning_rate": 7.742181270488172e-06, | |
| "loss": 2.7119, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 7.82641893611606, | |
| "grad_norm": 147.27064514160156, | |
| "learning_rate": 7.72003189510056e-06, | |
| "loss": 2.739, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 7.8900483583609065, | |
| "grad_norm": 109.4032211303711, | |
| "learning_rate": 7.697882519712945e-06, | |
| "loss": 2.686, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 7.953677780605752, | |
| "grad_norm": 111.08818054199219, | |
| "learning_rate": 7.675733144325331e-06, | |
| "loss": 2.7295, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 8.017307202850597, | |
| "grad_norm": 80.8994369506836, | |
| "learning_rate": 7.653583768937717e-06, | |
| "loss": 2.6062, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 8.080936625095443, | |
| "grad_norm": 132.42283630371094, | |
| "learning_rate": 7.631434393550102e-06, | |
| "loss": 2.2272, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 8.144566047340291, | |
| "grad_norm": 105.58837127685547, | |
| "learning_rate": 7.6093293169132635e-06, | |
| "loss": 2.2692, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 8.208195469585137, | |
| "grad_norm": 165.8797149658203, | |
| "learning_rate": 7.58717994152565e-06, | |
| "loss": 2.3135, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 8.271824891829983, | |
| "grad_norm": 103.73261260986328, | |
| "learning_rate": 7.5650305661380356e-06, | |
| "loss": 2.2546, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 8.335454314074829, | |
| "grad_norm": 100.5468521118164, | |
| "learning_rate": 7.542881190750422e-06, | |
| "loss": 2.2882, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 8.399083736319675, | |
| "grad_norm": 124.30194854736328, | |
| "learning_rate": 7.520731815362808e-06, | |
| "loss": 2.2749, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 8.46271315856452, | |
| "grad_norm": 124.07736206054688, | |
| "learning_rate": 7.498582439975194e-06, | |
| "loss": 2.363, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 8.526342580809366, | |
| "grad_norm": 110.9386978149414, | |
| "learning_rate": 7.47643306458758e-06, | |
| "loss": 2.2923, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 8.589972003054212, | |
| "grad_norm": 129.3117218017578, | |
| "learning_rate": 7.4542836891999645e-06, | |
| "loss": 2.3275, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 8.653601425299058, | |
| "grad_norm": 111.8931884765625, | |
| "learning_rate": 7.432134313812351e-06, | |
| "loss": 2.3738, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 8.717230847543904, | |
| "grad_norm": 118.7526626586914, | |
| "learning_rate": 7.409984938424737e-06, | |
| "loss": 2.3416, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 8.78086026978875, | |
| "grad_norm": 149.440673828125, | |
| "learning_rate": 7.387835563037123e-06, | |
| "loss": 2.3851, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 8.844489692033596, | |
| "grad_norm": 122.81755828857422, | |
| "learning_rate": 7.365730486400284e-06, | |
| "loss": 2.3356, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 8.908119114278442, | |
| "grad_norm": 132.1360626220703, | |
| "learning_rate": 7.34358111101267e-06, | |
| "loss": 2.3598, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 8.971748536523288, | |
| "grad_norm": 125.38104248046875, | |
| "learning_rate": 7.3214317356250565e-06, | |
| "loss": 2.4272, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 9.035377958768134, | |
| "grad_norm": 94.84292602539062, | |
| "learning_rate": 7.299326658988217e-06, | |
| "loss": 2.141, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 9.09900738101298, | |
| "grad_norm": 108.36376190185547, | |
| "learning_rate": 7.2771772836006025e-06, | |
| "loss": 2.001, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 9.162636803257826, | |
| "grad_norm": 120.51274108886719, | |
| "learning_rate": 7.255027908212989e-06, | |
| "loss": 2.014, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 9.226266225502673, | |
| "grad_norm": 76.73661041259766, | |
| "learning_rate": 7.232878532825375e-06, | |
| "loss": 1.9826, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 9.28989564774752, | |
| "grad_norm": 93.48287200927734, | |
| "learning_rate": 7.210729157437761e-06, | |
| "loss": 1.995, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 9.353525069992365, | |
| "grad_norm": 87.56092071533203, | |
| "learning_rate": 7.188579782050147e-06, | |
| "loss": 2.0097, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 9.417154492237211, | |
| "grad_norm": 128.68373107910156, | |
| "learning_rate": 7.166430406662532e-06, | |
| "loss": 2.0412, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 9.480783914482057, | |
| "grad_norm": 101.52668762207031, | |
| "learning_rate": 7.144281031274919e-06, | |
| "loss": 2.0144, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 9.544413336726903, | |
| "grad_norm": 90.50218963623047, | |
| "learning_rate": 7.12217595463808e-06, | |
| "loss": 2.0653, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 9.608042758971749, | |
| "grad_norm": 113.8707046508789, | |
| "learning_rate": 7.100026579250465e-06, | |
| "loss": 2.022, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 9.671672181216595, | |
| "grad_norm": 78.54847717285156, | |
| "learning_rate": 7.077921502613627e-06, | |
| "loss": 2.0327, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 9.73530160346144, | |
| "grad_norm": 131.4427947998047, | |
| "learning_rate": 7.055772127226013e-06, | |
| "loss": 2.0596, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 9.798931025706286, | |
| "grad_norm": 120.61900329589844, | |
| "learning_rate": 7.033667050589174e-06, | |
| "loss": 2.0761, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 9.862560447951132, | |
| "grad_norm": 84.29814147949219, | |
| "learning_rate": 7.01151767520156e-06, | |
| "loss": 2.1245, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 9.926189870195978, | |
| "grad_norm": 91.78532409667969, | |
| "learning_rate": 6.989368299813946e-06, | |
| "loss": 2.1062, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 9.989819292440824, | |
| "grad_norm": 111.85667419433594, | |
| "learning_rate": 6.9672189244263324e-06, | |
| "loss": 2.1186, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 10.05344871468567, | |
| "grad_norm": 97.98519897460938, | |
| "learning_rate": 6.945113847789493e-06, | |
| "loss": 1.8283, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 10.117078136930516, | |
| "grad_norm": 80.28434753417969, | |
| "learning_rate": 6.9229644724018785e-06, | |
| "loss": 1.7627, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 10.180707559175362, | |
| "grad_norm": 99.89539337158203, | |
| "learning_rate": 6.900859395765041e-06, | |
| "loss": 1.7775, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 10.244336981420208, | |
| "grad_norm": 87.49510955810547, | |
| "learning_rate": 6.878710020377426e-06, | |
| "loss": 1.7865, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 10.307966403665056, | |
| "grad_norm": 87.29383850097656, | |
| "learning_rate": 6.856560644989811e-06, | |
| "loss": 1.8018, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 10.371595825909901, | |
| "grad_norm": 88.82074737548828, | |
| "learning_rate": 6.834411269602198e-06, | |
| "loss": 1.7851, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 10.435225248154747, | |
| "grad_norm": 90.42290496826172, | |
| "learning_rate": 6.812261894214583e-06, | |
| "loss": 1.8085, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 10.498854670399593, | |
| "grad_norm": 85.89569091796875, | |
| "learning_rate": 6.7901125188269704e-06, | |
| "loss": 1.8293, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 10.56248409264444, | |
| "grad_norm": 89.20499420166016, | |
| "learning_rate": 6.767963143439355e-06, | |
| "loss": 1.8549, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 10.626113514889285, | |
| "grad_norm": 193.05775451660156, | |
| "learning_rate": 6.745813768051741e-06, | |
| "loss": 1.8531, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 10.689742937134131, | |
| "grad_norm": 106.58789825439453, | |
| "learning_rate": 6.723664392664127e-06, | |
| "loss": 1.8538, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 10.753372359378977, | |
| "grad_norm": 136.8468780517578, | |
| "learning_rate": 6.701515017276513e-06, | |
| "loss": 1.8814, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 10.817001781623823, | |
| "grad_norm": 128.12271118164062, | |
| "learning_rate": 6.679365641888899e-06, | |
| "loss": 1.8576, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 10.880631203868669, | |
| "grad_norm": 70.90370178222656, | |
| "learning_rate": 6.657216266501285e-06, | |
| "loss": 1.8516, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 10.944260626113515, | |
| "grad_norm": 77.27445220947266, | |
| "learning_rate": 6.635066891113671e-06, | |
| "loss": 1.8555, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 11.00789004835836, | |
| "grad_norm": 108.38621520996094, | |
| "learning_rate": 6.612917515726057e-06, | |
| "loss": 1.8631, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 11.071519470603207, | |
| "grad_norm": 145.12940979003906, | |
| "learning_rate": 6.590768140338443e-06, | |
| "loss": 1.6189, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 11.135148892848052, | |
| "grad_norm": 115.5062484741211, | |
| "learning_rate": 6.568618764950829e-06, | |
| "loss": 1.6143, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 11.198778315092898, | |
| "grad_norm": 71.71438598632812, | |
| "learning_rate": 6.546469389563215e-06, | |
| "loss": 1.6246, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 11.262407737337744, | |
| "grad_norm": 89.9764633178711, | |
| "learning_rate": 6.5243200141756004e-06, | |
| "loss": 1.5997, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 11.32603715958259, | |
| "grad_norm": 80.51982879638672, | |
| "learning_rate": 6.502170638787987e-06, | |
| "loss": 1.646, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 11.389666581827438, | |
| "grad_norm": 87.14283752441406, | |
| "learning_rate": 6.4800212634003725e-06, | |
| "loss": 1.6323, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 11.453296004072284, | |
| "grad_norm": 76.05656433105469, | |
| "learning_rate": 6.457871888012759e-06, | |
| "loss": 1.6623, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 11.51692542631713, | |
| "grad_norm": 84.08787536621094, | |
| "learning_rate": 6.435722512625145e-06, | |
| "loss": 1.6544, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 11.580554848561976, | |
| "grad_norm": 113.19395446777344, | |
| "learning_rate": 6.413573137237531e-06, | |
| "loss": 1.6671, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 11.644184270806822, | |
| "grad_norm": 92.68965911865234, | |
| "learning_rate": 6.391423761849917e-06, | |
| "loss": 1.6742, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 11.707813693051667, | |
| "grad_norm": 116.95278930664062, | |
| "learning_rate": 6.369274386462302e-06, | |
| "loss": 1.6409, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 11.771443115296513, | |
| "grad_norm": 77.6058120727539, | |
| "learning_rate": 6.347213608576238e-06, | |
| "loss": 1.6504, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 11.83507253754136, | |
| "grad_norm": 74.96102142333984, | |
| "learning_rate": 6.3251085319394e-06, | |
| "loss": 1.6791, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 11.898701959786205, | |
| "grad_norm": 95.83757781982422, | |
| "learning_rate": 6.302959156551785e-06, | |
| "loss": 1.6923, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 11.962331382031051, | |
| "grad_norm": 114.55757141113281, | |
| "learning_rate": 6.280809781164172e-06, | |
| "loss": 1.697, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 12.025960804275897, | |
| "grad_norm": 59.73118591308594, | |
| "learning_rate": 6.258660405776557e-06, | |
| "loss": 1.6136, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 12.089590226520743, | |
| "grad_norm": 86.23199462890625, | |
| "learning_rate": 6.236511030388943e-06, | |
| "loss": 1.4437, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 12.153219648765589, | |
| "grad_norm": 71.51868438720703, | |
| "learning_rate": 6.2143616550013295e-06, | |
| "loss": 1.49, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 12.216849071010435, | |
| "grad_norm": 96.19779205322266, | |
| "learning_rate": 6.192212279613715e-06, | |
| "loss": 1.4567, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 12.28047849325528, | |
| "grad_norm": 79.43608093261719, | |
| "learning_rate": 6.170062904226102e-06, | |
| "loss": 1.5007, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 12.344107915500127, | |
| "grad_norm": 79.2935791015625, | |
| "learning_rate": 6.147913528838487e-06, | |
| "loss": 1.4826, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 12.407737337744972, | |
| "grad_norm": 144.53054809570312, | |
| "learning_rate": 6.125764153450873e-06, | |
| "loss": 1.4668, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 12.47136675998982, | |
| "grad_norm": 105.31471252441406, | |
| "learning_rate": 6.103659076814035e-06, | |
| "loss": 1.5009, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 12.534996182234666, | |
| "grad_norm": 79.45948028564453, | |
| "learning_rate": 6.08150970142642e-06, | |
| "loss": 1.5008, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 12.598625604479512, | |
| "grad_norm": 100.81867218017578, | |
| "learning_rate": 6.059360326038807e-06, | |
| "loss": 1.5336, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 12.662255026724358, | |
| "grad_norm": 94.66363525390625, | |
| "learning_rate": 6.037210950651192e-06, | |
| "loss": 1.5057, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 12.725884448969204, | |
| "grad_norm": 73.030517578125, | |
| "learning_rate": 6.0150615752635775e-06, | |
| "loss": 1.5081, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 12.78951387121405, | |
| "grad_norm": 67.0549545288086, | |
| "learning_rate": 5.99295649862674e-06, | |
| "loss": 1.5402, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 12.853143293458896, | |
| "grad_norm": 91.37773895263672, | |
| "learning_rate": 5.970807123239125e-06, | |
| "loss": 1.5519, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 12.916772715703742, | |
| "grad_norm": 87.36595153808594, | |
| "learning_rate": 5.948657747851511e-06, | |
| "loss": 1.5171, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 12.980402137948587, | |
| "grad_norm": 82.45221710205078, | |
| "learning_rate": 5.926508372463897e-06, | |
| "loss": 1.5249, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 13.044031560193433, | |
| "grad_norm": 67.87359619140625, | |
| "learning_rate": 5.904358997076283e-06, | |
| "loss": 1.4117, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 13.10766098243828, | |
| "grad_norm": 77.75003814697266, | |
| "learning_rate": 5.882209621688669e-06, | |
| "loss": 1.3524, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 13.171290404683125, | |
| "grad_norm": 103.19142150878906, | |
| "learning_rate": 5.860060246301055e-06, | |
| "loss": 1.3564, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 13.234919826927971, | |
| "grad_norm": 82.8349380493164, | |
| "learning_rate": 5.837999468414991e-06, | |
| "loss": 1.3483, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 13.298549249172817, | |
| "grad_norm": 83.94813537597656, | |
| "learning_rate": 5.815850093027378e-06, | |
| "loss": 1.386, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 13.362178671417663, | |
| "grad_norm": 80.00110626220703, | |
| "learning_rate": 5.793700717639763e-06, | |
| "loss": 1.3723, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 13.425808093662509, | |
| "grad_norm": 79.54706573486328, | |
| "learning_rate": 5.771551342252149e-06, | |
| "loss": 1.3933, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 13.489437515907355, | |
| "grad_norm": 118.33966827392578, | |
| "learning_rate": 5.749401966864535e-06, | |
| "loss": 1.3672, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 13.553066938152202, | |
| "grad_norm": 148.68141174316406, | |
| "learning_rate": 5.727252591476921e-06, | |
| "loss": 1.3796, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 13.616696360397048, | |
| "grad_norm": 81.23079681396484, | |
| "learning_rate": 5.705103216089307e-06, | |
| "loss": 1.3637, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 13.680325782641894, | |
| "grad_norm": 118.37026977539062, | |
| "learning_rate": 5.682953840701693e-06, | |
| "loss": 1.4061, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 13.74395520488674, | |
| "grad_norm": 87.67139434814453, | |
| "learning_rate": 5.660804465314078e-06, | |
| "loss": 1.3897, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 13.807584627131586, | |
| "grad_norm": 76.84065246582031, | |
| "learning_rate": 5.638655089926465e-06, | |
| "loss": 1.4342, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 13.871214049376432, | |
| "grad_norm": 83.0779037475586, | |
| "learning_rate": 5.61650571453885e-06, | |
| "loss": 1.3821, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 13.934843471621278, | |
| "grad_norm": 63.323001861572266, | |
| "learning_rate": 5.594400637902012e-06, | |
| "loss": 1.411, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 13.998472893866124, | |
| "grad_norm": 75.757080078125, | |
| "learning_rate": 5.572295561265173e-06, | |
| "loss": 1.4214, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 14.06210231611097, | |
| "grad_norm": 47.76633071899414, | |
| "learning_rate": 5.550146185877559e-06, | |
| "loss": 1.2551, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 14.125731738355816, | |
| "grad_norm": 67.52932739257812, | |
| "learning_rate": 5.528041109240719e-06, | |
| "loss": 1.2366, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 14.189361160600662, | |
| "grad_norm": 77.91776275634766, | |
| "learning_rate": 5.505891733853106e-06, | |
| "loss": 1.2553, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 14.252990582845507, | |
| "grad_norm": 74.56119537353516, | |
| "learning_rate": 5.4837423584654914e-06, | |
| "loss": 1.2553, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 14.316620005090353, | |
| "grad_norm": 70.80554962158203, | |
| "learning_rate": 5.461592983077878e-06, | |
| "loss": 1.2624, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 14.3802494273352, | |
| "grad_norm": 72.7087631225586, | |
| "learning_rate": 5.4394436076902635e-06, | |
| "loss": 1.2771, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 14.443878849580045, | |
| "grad_norm": 81.98471069335938, | |
| "learning_rate": 5.41729423230265e-06, | |
| "loss": 1.2744, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 14.507508271824891, | |
| "grad_norm": 71.72978973388672, | |
| "learning_rate": 5.395189155665811e-06, | |
| "loss": 1.2616, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 14.571137694069737, | |
| "grad_norm": 73.07415771484375, | |
| "learning_rate": 5.373039780278196e-06, | |
| "loss": 1.2744, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 14.634767116314585, | |
| "grad_norm": 46.78715133666992, | |
| "learning_rate": 5.350890404890583e-06, | |
| "loss": 1.2705, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 14.69839653855943, | |
| "grad_norm": 80.48126220703125, | |
| "learning_rate": 5.328741029502968e-06, | |
| "loss": 1.3005, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 14.762025960804277, | |
| "grad_norm": 78.11446380615234, | |
| "learning_rate": 5.306591654115354e-06, | |
| "loss": 1.3013, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 14.825655383049122, | |
| "grad_norm": 113.7435302734375, | |
| "learning_rate": 5.28444227872774e-06, | |
| "loss": 1.298, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 14.889284805293968, | |
| "grad_norm": 58.536346435546875, | |
| "learning_rate": 5.262292903340126e-06, | |
| "loss": 1.2972, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 14.952914227538814, | |
| "grad_norm": 85.87594604492188, | |
| "learning_rate": 5.240143527952512e-06, | |
| "loss": 1.277, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 15.01654364978366, | |
| "grad_norm": 61.39375305175781, | |
| "learning_rate": 5.217994152564898e-06, | |
| "loss": 1.2718, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 15.080173072028506, | |
| "grad_norm": 64.70631408691406, | |
| "learning_rate": 5.1958447771772836e-06, | |
| "loss": 1.1697, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 15.143802494273352, | |
| "grad_norm": 81.51799774169922, | |
| "learning_rate": 5.17369540178967e-06, | |
| "loss": 1.1819, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 15.207431916518198, | |
| "grad_norm": 81.38251495361328, | |
| "learning_rate": 5.151546026402056e-06, | |
| "loss": 1.1916, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 15.271061338763044, | |
| "grad_norm": 87.31340789794922, | |
| "learning_rate": 5.129396651014442e-06, | |
| "loss": 1.1829, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 15.33469076100789, | |
| "grad_norm": 67.25629425048828, | |
| "learning_rate": 5.107247275626828e-06, | |
| "loss": 1.1632, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 15.398320183252736, | |
| "grad_norm": 56.04712677001953, | |
| "learning_rate": 5.085097900239213e-06, | |
| "loss": 1.1809, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 15.461949605497582, | |
| "grad_norm": 66.33815002441406, | |
| "learning_rate": 5.0629928236023755e-06, | |
| "loss": 1.1913, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 15.525579027742427, | |
| "grad_norm": 69.98699951171875, | |
| "learning_rate": 5.04084344821476e-06, | |
| "loss": 1.1916, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 15.589208449987273, | |
| "grad_norm": 70.65410614013672, | |
| "learning_rate": 5.018694072827147e-06, | |
| "loss": 1.1969, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 15.65283787223212, | |
| "grad_norm": 68.66796875, | |
| "learning_rate": 4.996544697439532e-06, | |
| "loss": 1.1929, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 15.716467294476967, | |
| "grad_norm": 68.35984802246094, | |
| "learning_rate": 4.974439620802694e-06, | |
| "loss": 1.2086, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 15.780096716721813, | |
| "grad_norm": 64.63552856445312, | |
| "learning_rate": 4.952290245415079e-06, | |
| "loss": 1.1864, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 15.843726138966659, | |
| "grad_norm": 59.172645568847656, | |
| "learning_rate": 4.930140870027466e-06, | |
| "loss": 1.2068, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 15.907355561211505, | |
| "grad_norm": 64.2562255859375, | |
| "learning_rate": 4.907991494639851e-06, | |
| "loss": 1.2253, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 15.97098498345635, | |
| "grad_norm": 61.80392837524414, | |
| "learning_rate": 4.885842119252238e-06, | |
| "loss": 1.1963, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 16.034614405701195, | |
| "grad_norm": 86.79552459716797, | |
| "learning_rate": 4.8636927438646234e-06, | |
| "loss": 1.1585, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 16.098243827946042, | |
| "grad_norm": 81.79373931884766, | |
| "learning_rate": 4.841543368477009e-06, | |
| "loss": 1.0834, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 16.161873250190887, | |
| "grad_norm": 60.24835205078125, | |
| "learning_rate": 4.8193939930893955e-06, | |
| "loss": 1.0937, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 16.225502672435734, | |
| "grad_norm": 74.93160247802734, | |
| "learning_rate": 4.797244617701781e-06, | |
| "loss": 1.0995, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 16.289132094680582, | |
| "grad_norm": 75.08971405029297, | |
| "learning_rate": 4.775095242314168e-06, | |
| "loss": 1.0787, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 16.352761516925426, | |
| "grad_norm": 66.41687774658203, | |
| "learning_rate": 4.752990165677328e-06, | |
| "loss": 1.1217, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 16.416390939170274, | |
| "grad_norm": 68.62983703613281, | |
| "learning_rate": 4.730840790289714e-06, | |
| "loss": 1.1185, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 16.480020361415118, | |
| "grad_norm": 67.19387817382812, | |
| "learning_rate": 4.7086914149021e-06, | |
| "loss": 1.1203, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 16.543649783659966, | |
| "grad_norm": 84.18933868408203, | |
| "learning_rate": 4.686542039514486e-06, | |
| "loss": 1.1201, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 16.60727920590481, | |
| "grad_norm": 56.41159439086914, | |
| "learning_rate": 4.664392664126872e-06, | |
| "loss": 1.125, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 16.670908628149657, | |
| "grad_norm": 90.81446075439453, | |
| "learning_rate": 4.642376184991584e-06, | |
| "loss": 1.1214, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 16.7345380503945, | |
| "grad_norm": 66.80113220214844, | |
| "learning_rate": 4.62022680960397e-06, | |
| "loss": 1.1228, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 16.79816747263935, | |
| "grad_norm": 68.8161849975586, | |
| "learning_rate": 4.598077434216355e-06, | |
| "loss": 1.1381, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 16.861796894884193, | |
| "grad_norm": 80.11527252197266, | |
| "learning_rate": 4.575928058828742e-06, | |
| "loss": 1.1414, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 16.92542631712904, | |
| "grad_norm": 64.7822036743164, | |
| "learning_rate": 4.553778683441127e-06, | |
| "loss": 1.123, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 16.989055739373885, | |
| "grad_norm": 65.32453918457031, | |
| "learning_rate": 4.531629308053514e-06, | |
| "loss": 1.1003, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 17.052685161618733, | |
| "grad_norm": 91.62205505371094, | |
| "learning_rate": 4.5094799326658994e-06, | |
| "loss": 1.0447, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 17.116314583863577, | |
| "grad_norm": 49.810699462890625, | |
| "learning_rate": 4.487330557278285e-06, | |
| "loss": 1.036, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 17.179944006108425, | |
| "grad_norm": 64.78093719482422, | |
| "learning_rate": 4.465181181890671e-06, | |
| "loss": 1.0264, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 17.24357342835327, | |
| "grad_norm": 61.86587905883789, | |
| "learning_rate": 4.443031806503057e-06, | |
| "loss": 1.0375, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 17.307202850598117, | |
| "grad_norm": 57.83167266845703, | |
| "learning_rate": 4.420882431115443e-06, | |
| "loss": 1.0509, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 17.370832272842964, | |
| "grad_norm": 81.06472778320312, | |
| "learning_rate": 4.398733055727829e-06, | |
| "loss": 1.0452, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 17.43446169508781, | |
| "grad_norm": 77.34078979492188, | |
| "learning_rate": 4.376583680340215e-06, | |
| "loss": 1.0519, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 17.498091117332656, | |
| "grad_norm": 75.93341064453125, | |
| "learning_rate": 4.3544343049526005e-06, | |
| "loss": 1.0498, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 17.5617205395775, | |
| "grad_norm": 56.93536376953125, | |
| "learning_rate": 4.332284929564987e-06, | |
| "loss": 1.0514, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 17.625349961822348, | |
| "grad_norm": 63.56499481201172, | |
| "learning_rate": 4.310179852928148e-06, | |
| "loss": 1.054, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 17.688979384067192, | |
| "grad_norm": 71.97218322753906, | |
| "learning_rate": 4.288030477540534e-06, | |
| "loss": 1.0457, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 17.75260880631204, | |
| "grad_norm": 63.93644332885742, | |
| "learning_rate": 4.265925400903695e-06, | |
| "loss": 1.0582, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 17.816238228556884, | |
| "grad_norm": 60.03602981567383, | |
| "learning_rate": 4.243776025516081e-06, | |
| "loss": 1.0566, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 17.87986765080173, | |
| "grad_norm": 66.63105010986328, | |
| "learning_rate": 4.221626650128467e-06, | |
| "loss": 1.0644, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 17.943497073046576, | |
| "grad_norm": 66.42560577392578, | |
| "learning_rate": 4.199477274740853e-06, | |
| "loss": 1.0579, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 18.007126495291423, | |
| "grad_norm": 58.22215270996094, | |
| "learning_rate": 4.1773278993532385e-06, | |
| "loss": 1.0647, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 18.070755917536268, | |
| "grad_norm": 63.40778350830078, | |
| "learning_rate": 4.155178523965624e-06, | |
| "loss": 0.9704, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 18.134385339781115, | |
| "grad_norm": 67.59272003173828, | |
| "learning_rate": 4.1330291485780105e-06, | |
| "loss": 0.9787, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 18.19801476202596, | |
| "grad_norm": 58.86367416381836, | |
| "learning_rate": 4.110879773190396e-06, | |
| "loss": 0.9875, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 18.261644184270807, | |
| "grad_norm": 72.68678283691406, | |
| "learning_rate": 4.088730397802782e-06, | |
| "loss": 0.987, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 18.32527360651565, | |
| "grad_norm": 69.95580291748047, | |
| "learning_rate": 4.066581022415168e-06, | |
| "loss": 0.9834, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 18.3889030287605, | |
| "grad_norm": 63.809104919433594, | |
| "learning_rate": 4.044431647027554e-06, | |
| "loss": 0.999, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 18.452532451005347, | |
| "grad_norm": 76.64576721191406, | |
| "learning_rate": 4.02228227163994e-06, | |
| "loss": 0.9872, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 18.51616187325019, | |
| "grad_norm": 54.77001953125, | |
| "learning_rate": 4.000177195003101e-06, | |
| "loss": 0.9851, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 18.57979129549504, | |
| "grad_norm": 67.22696685791016, | |
| "learning_rate": 3.978027819615487e-06, | |
| "loss": 0.9986, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 18.643420717739883, | |
| "grad_norm": 69.88746643066406, | |
| "learning_rate": 3.955878444227873e-06, | |
| "loss": 0.9853, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 18.70705013998473, | |
| "grad_norm": 66.42214965820312, | |
| "learning_rate": 3.933729068840259e-06, | |
| "loss": 0.9973, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 18.770679562229574, | |
| "grad_norm": 75.5511245727539, | |
| "learning_rate": 3.9116682909541955e-06, | |
| "loss": 0.988, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 18.834308984474422, | |
| "grad_norm": 73.2605209350586, | |
| "learning_rate": 3.889518915566581e-06, | |
| "loss": 0.999, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 18.897938406719266, | |
| "grad_norm": 63.08274841308594, | |
| "learning_rate": 3.8673695401789675e-06, | |
| "loss": 0.9899, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 18.961567828964114, | |
| "grad_norm": 98.51166534423828, | |
| "learning_rate": 3.845220164791353e-06, | |
| "loss": 1.0053, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 19.025197251208958, | |
| "grad_norm": 67.6368408203125, | |
| "learning_rate": 3.823070789403739e-06, | |
| "loss": 0.9802, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 19.088826673453806, | |
| "grad_norm": 71.1702880859375, | |
| "learning_rate": 3.800921414016125e-06, | |
| "loss": 0.9301, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 19.15245609569865, | |
| "grad_norm": 74.88888549804688, | |
| "learning_rate": 3.778772038628511e-06, | |
| "loss": 0.9295, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 19.216085517943498, | |
| "grad_norm": 49.797691345214844, | |
| "learning_rate": 3.756622663240897e-06, | |
| "loss": 0.9334, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 19.27971494018834, | |
| "grad_norm": 49.12934875488281, | |
| "learning_rate": 3.734473287853283e-06, | |
| "loss": 0.9503, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 19.34334436243319, | |
| "grad_norm": 47.530452728271484, | |
| "learning_rate": 3.712323912465669e-06, | |
| "loss": 0.9161, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 19.406973784678033, | |
| "grad_norm": 69.1083984375, | |
| "learning_rate": 3.6901745370780546e-06, | |
| "loss": 0.9433, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 19.47060320692288, | |
| "grad_norm": 62.554718017578125, | |
| "learning_rate": 3.6680251616904407e-06, | |
| "loss": 0.9376, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 19.53423262916773, | |
| "grad_norm": 55.151100158691406, | |
| "learning_rate": 3.645920085053602e-06, | |
| "loss": 0.9274, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 19.597862051412573, | |
| "grad_norm": 60.6050910949707, | |
| "learning_rate": 3.623770709665988e-06, | |
| "loss": 0.9414, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 19.66149147365742, | |
| "grad_norm": 55.62131118774414, | |
| "learning_rate": 3.6016213342783736e-06, | |
| "loss": 0.94, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 19.725120895902265, | |
| "grad_norm": 59.69659423828125, | |
| "learning_rate": 3.5794719588907597e-06, | |
| "loss": 0.9344, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 19.788750318147112, | |
| "grad_norm": 46.444984436035156, | |
| "learning_rate": 3.557366882253921e-06, | |
| "loss": 0.9464, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 19.852379740391957, | |
| "grad_norm": 63.4849739074707, | |
| "learning_rate": 3.535217506866307e-06, | |
| "loss": 0.9583, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 19.916009162636804, | |
| "grad_norm": 69.28148651123047, | |
| "learning_rate": 3.5130681314786926e-06, | |
| "loss": 0.953, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 19.97963858488165, | |
| "grad_norm": 127.65802764892578, | |
| "learning_rate": 3.4909187560910782e-06, | |
| "loss": 0.9481, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 20.043268007126496, | |
| "grad_norm": 64.03028106689453, | |
| "learning_rate": 3.46881367945424e-06, | |
| "loss": 0.8982, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 20.10689742937134, | |
| "grad_norm": 68.30170440673828, | |
| "learning_rate": 3.446664304066625e-06, | |
| "loss": 0.8974, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 20.170526851616188, | |
| "grad_norm": 44.01250457763672, | |
| "learning_rate": 3.424514928679011e-06, | |
| "loss": 0.9022, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 20.234156273861032, | |
| "grad_norm": 65.26950073242188, | |
| "learning_rate": 3.4023655532913972e-06, | |
| "loss": 0.8923, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 20.29778569610588, | |
| "grad_norm": 46.552730560302734, | |
| "learning_rate": 3.380260476654559e-06, | |
| "loss": 0.8935, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 20.361415118350724, | |
| "grad_norm": 59.73283767700195, | |
| "learning_rate": 3.358111101266944e-06, | |
| "loss": 0.8917, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 20.42504454059557, | |
| "grad_norm": 84.19660186767578, | |
| "learning_rate": 3.33596172587933e-06, | |
| "loss": 0.9021, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 20.488673962840416, | |
| "grad_norm": 48.705833435058594, | |
| "learning_rate": 3.3138123504917162e-06, | |
| "loss": 0.8978, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 20.552303385085263, | |
| "grad_norm": 44.999656677246094, | |
| "learning_rate": 3.2916629751041023e-06, | |
| "loss": 0.9078, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 20.61593280733011, | |
| "grad_norm": 62.21163558959961, | |
| "learning_rate": 3.2695135997164883e-06, | |
| "loss": 0.903, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 20.679562229574955, | |
| "grad_norm": 61.40314483642578, | |
| "learning_rate": 3.247408523079649e-06, | |
| "loss": 0.8989, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 20.743191651819803, | |
| "grad_norm": 55.93895721435547, | |
| "learning_rate": 3.2252591476920352e-06, | |
| "loss": 0.9023, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 20.806821074064647, | |
| "grad_norm": 64.3861312866211, | |
| "learning_rate": 3.2031097723044213e-06, | |
| "loss": 0.8918, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 20.870450496309495, | |
| "grad_norm": 92.62686157226562, | |
| "learning_rate": 3.1809603969168073e-06, | |
| "loss": 0.8968, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 20.93407991855434, | |
| "grad_norm": 58.37923049926758, | |
| "learning_rate": 3.1588110215291934e-06, | |
| "loss": 0.8977, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 20.997709340799187, | |
| "grad_norm": 49.36125564575195, | |
| "learning_rate": 3.136661646141579e-06, | |
| "loss": 0.9035, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 21.06133876304403, | |
| "grad_norm": 69.00907135009766, | |
| "learning_rate": 3.114512270753965e-06, | |
| "loss": 0.8347, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 21.12496818528888, | |
| "grad_norm": 56.581600189208984, | |
| "learning_rate": 3.0924071941171263e-06, | |
| "loss": 0.8415, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 21.188597607533723, | |
| "grad_norm": 56.39949417114258, | |
| "learning_rate": 3.0702578187295124e-06, | |
| "loss": 0.8472, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 21.25222702977857, | |
| "grad_norm": 58.36819839477539, | |
| "learning_rate": 3.048108443341898e-06, | |
| "loss": 0.8663, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 21.315856452023414, | |
| "grad_norm": 59.97529602050781, | |
| "learning_rate": 3.025959067954284e-06, | |
| "loss": 0.8633, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 21.379485874268262, | |
| "grad_norm": 52.96846008300781, | |
| "learning_rate": 3.00380969256667e-06, | |
| "loss": 0.8569, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 21.443115296513106, | |
| "grad_norm": 52.254085540771484, | |
| "learning_rate": 2.981660317179056e-06, | |
| "loss": 0.8529, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 21.506744718757954, | |
| "grad_norm": 80.7918472290039, | |
| "learning_rate": 2.959555240542217e-06, | |
| "loss": 0.8485, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 21.570374141002798, | |
| "grad_norm": 59.65958023071289, | |
| "learning_rate": 2.9374058651546026e-06, | |
| "loss": 0.8759, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 21.634003563247646, | |
| "grad_norm": 48.33919906616211, | |
| "learning_rate": 2.9152564897669886e-06, | |
| "loss": 0.8667, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 21.697632985492493, | |
| "grad_norm": 68.83987426757812, | |
| "learning_rate": 2.8931071143793747e-06, | |
| "loss": 0.8615, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 21.761262407737338, | |
| "grad_norm": 42.605552673339844, | |
| "learning_rate": 2.8709577389917607e-06, | |
| "loss": 0.8623, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 21.824891829982185, | |
| "grad_norm": 57.37046432495117, | |
| "learning_rate": 2.8488083636041464e-06, | |
| "loss": 0.8613, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 21.88852125222703, | |
| "grad_norm": 66.89559173583984, | |
| "learning_rate": 2.8266589882165324e-06, | |
| "loss": 0.8515, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 21.952150674471877, | |
| "grad_norm": 53.939571380615234, | |
| "learning_rate": 2.8045096128289184e-06, | |
| "loss": 0.8615, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 22.01578009671672, | |
| "grad_norm": 61.67373275756836, | |
| "learning_rate": 2.7824045361920797e-06, | |
| "loss": 0.8457, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 22.07940951896157, | |
| "grad_norm": 71.31520080566406, | |
| "learning_rate": 2.7602551608044653e-06, | |
| "loss": 0.8106, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 22.143038941206413, | |
| "grad_norm": 44.70698165893555, | |
| "learning_rate": 2.7381057854168514e-06, | |
| "loss": 0.8109, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 22.20666836345126, | |
| "grad_norm": 43.95622253417969, | |
| "learning_rate": 2.7159564100292374e-06, | |
| "loss": 0.8108, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 22.270297785696105, | |
| "grad_norm": 55.156822204589844, | |
| "learning_rate": 2.6938513333923987e-06, | |
| "loss": 0.8197, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 22.333927207940953, | |
| "grad_norm": 85.59542846679688, | |
| "learning_rate": 2.6717019580047843e-06, | |
| "loss": 0.8165, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 22.397556630185797, | |
| "grad_norm": 65.07913208007812, | |
| "learning_rate": 2.6495525826171704e-06, | |
| "loss": 0.8289, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 22.461186052430644, | |
| "grad_norm": 65.89120483398438, | |
| "learning_rate": 2.6274032072295564e-06, | |
| "loss": 0.8288, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 22.52481547467549, | |
| "grad_norm": 55.914939880371094, | |
| "learning_rate": 2.6052981305927177e-06, | |
| "loss": 0.8145, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 22.588444896920336, | |
| "grad_norm": 80.44625854492188, | |
| "learning_rate": 2.5831487552051033e-06, | |
| "loss": 0.8249, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 22.65207431916518, | |
| "grad_norm": 65.78691101074219, | |
| "learning_rate": 2.5609993798174894e-06, | |
| "loss": 0.8218, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 22.715703741410028, | |
| "grad_norm": 41.24105453491211, | |
| "learning_rate": 2.5388500044298754e-06, | |
| "loss": 0.8284, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 22.779333163654876, | |
| "grad_norm": 64.46809387207031, | |
| "learning_rate": 2.5167892265438115e-06, | |
| "loss": 0.833, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 22.84296258589972, | |
| "grad_norm": 56.47655487060547, | |
| "learning_rate": 2.4946398511561976e-06, | |
| "loss": 0.8176, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 22.906592008144568, | |
| "grad_norm": 71.83133697509766, | |
| "learning_rate": 2.4724904757685836e-06, | |
| "loss": 0.8431, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 22.97022143038941, | |
| "grad_norm": 53.66551971435547, | |
| "learning_rate": 2.450385399131745e-06, | |
| "loss": 0.8234, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 23.03385085263426, | |
| "grad_norm": 78.90325927734375, | |
| "learning_rate": 2.4282360237441305e-06, | |
| "loss": 0.7998, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 23.097480274879103, | |
| "grad_norm": 64.31370544433594, | |
| "learning_rate": 2.4060866483565166e-06, | |
| "loss": 0.7821, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 23.16110969712395, | |
| "grad_norm": 48.68030548095703, | |
| "learning_rate": 2.3839372729689026e-06, | |
| "loss": 0.7914, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 23.224739119368795, | |
| "grad_norm": 52.06983184814453, | |
| "learning_rate": 2.3617878975812882e-06, | |
| "loss": 0.7851, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 23.288368541613643, | |
| "grad_norm": 50.310157775878906, | |
| "learning_rate": 2.3396385221936743e-06, | |
| "loss": 0.7797, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 23.351997963858487, | |
| "grad_norm": 52.41871643066406, | |
| "learning_rate": 2.3174891468060603e-06, | |
| "loss": 0.7931, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 23.415627386103335, | |
| "grad_norm": 88.78260040283203, | |
| "learning_rate": 2.295339771418446e-06, | |
| "loss": 0.7912, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 23.47925680834818, | |
| "grad_norm": 62.528663635253906, | |
| "learning_rate": 2.273190396030832e-06, | |
| "loss": 0.7876, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 23.542886230593027, | |
| "grad_norm": 46.27097702026367, | |
| "learning_rate": 2.251041020643218e-06, | |
| "loss": 0.7954, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 23.60651565283787, | |
| "grad_norm": 50.20694351196289, | |
| "learning_rate": 2.228891645255604e-06, | |
| "loss": 0.7946, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 23.67014507508272, | |
| "grad_norm": 56.892765045166016, | |
| "learning_rate": 2.20674226986799e-06, | |
| "loss": 0.7782, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 23.733774497327563, | |
| "grad_norm": 41.52644729614258, | |
| "learning_rate": 2.184637193231151e-06, | |
| "loss": 0.7952, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 23.79740391957241, | |
| "grad_norm": 58.025516510009766, | |
| "learning_rate": 2.162487817843537e-06, | |
| "loss": 0.8015, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 23.861033341817258, | |
| "grad_norm": 48.62569046020508, | |
| "learning_rate": 2.140338442455923e-06, | |
| "loss": 0.7977, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 23.924662764062102, | |
| "grad_norm": 46.91473388671875, | |
| "learning_rate": 2.1181890670683087e-06, | |
| "loss": 0.7875, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 23.98829218630695, | |
| "grad_norm": 52.42847442626953, | |
| "learning_rate": 2.09608399043147e-06, | |
| "loss": 0.7935, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 24.051921608551794, | |
| "grad_norm": 76.6783676147461, | |
| "learning_rate": 2.073934615043856e-06, | |
| "loss": 0.7617, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 24.11555103079664, | |
| "grad_norm": 67.17424011230469, | |
| "learning_rate": 2.0517852396562417e-06, | |
| "loss": 0.7625, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 24.179180453041486, | |
| "grad_norm": 50.021053314208984, | |
| "learning_rate": 2.0296358642686277e-06, | |
| "loss": 0.7514, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 24.242809875286333, | |
| "grad_norm": 53.048465728759766, | |
| "learning_rate": 2.0074864888810137e-06, | |
| "loss": 0.7662, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 24.306439297531178, | |
| "grad_norm": 67.73706817626953, | |
| "learning_rate": 1.9854257109949503e-06, | |
| "loss": 0.7692, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 24.370068719776025, | |
| "grad_norm": 57.47793960571289, | |
| "learning_rate": 1.9632763356073363e-06, | |
| "loss": 0.7733, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 24.43369814202087, | |
| "grad_norm": 59.039405822753906, | |
| "learning_rate": 1.941126960219722e-06, | |
| "loss": 0.7561, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 24.497327564265717, | |
| "grad_norm": 46.10505676269531, | |
| "learning_rate": 1.918977584832108e-06, | |
| "loss": 0.7577, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 24.56095698651056, | |
| "grad_norm": 85.20184326171875, | |
| "learning_rate": 1.8968282094444936e-06, | |
| "loss": 0.7687, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 24.62458640875541, | |
| "grad_norm": 53.42023849487305, | |
| "learning_rate": 1.8746788340568796e-06, | |
| "loss": 0.7647, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 24.688215831000253, | |
| "grad_norm": 63.070919036865234, | |
| "learning_rate": 1.852573757420041e-06, | |
| "loss": 0.7717, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 24.7518452532451, | |
| "grad_norm": 59.02709197998047, | |
| "learning_rate": 1.830424382032427e-06, | |
| "loss": 0.761, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 24.815474675489945, | |
| "grad_norm": 47.43505859375, | |
| "learning_rate": 1.8082750066448126e-06, | |
| "loss": 0.7661, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 24.879104097734793, | |
| "grad_norm": 79.37848663330078, | |
| "learning_rate": 1.7861256312571986e-06, | |
| "loss": 0.7446, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 24.94273351997964, | |
| "grad_norm": 51.29045104980469, | |
| "learning_rate": 1.7639762558695847e-06, | |
| "loss": 0.7659, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 25.006362942224484, | |
| "grad_norm": 43.27066421508789, | |
| "learning_rate": 1.7418711792327458e-06, | |
| "loss": 0.7559, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 25.069992364469332, | |
| "grad_norm": 45.82556915283203, | |
| "learning_rate": 1.7197218038451316e-06, | |
| "loss": 0.7183, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 25.133621786714176, | |
| "grad_norm": 61.48518753051758, | |
| "learning_rate": 1.6975724284575176e-06, | |
| "loss": 0.7399, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 25.197251208959024, | |
| "grad_norm": 56.30770492553711, | |
| "learning_rate": 1.6754230530699037e-06, | |
| "loss": 0.7308, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 25.260880631203868, | |
| "grad_norm": 77.96941375732422, | |
| "learning_rate": 1.6532736776822895e-06, | |
| "loss": 0.733, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 25.324510053448716, | |
| "grad_norm": 75.03907775878906, | |
| "learning_rate": 1.6311243022946753e-06, | |
| "loss": 0.746, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 25.38813947569356, | |
| "grad_norm": 49.624366760253906, | |
| "learning_rate": 1.6089749269070614e-06, | |
| "loss": 0.7274, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 25.451768897938408, | |
| "grad_norm": 54.31991195678711, | |
| "learning_rate": 1.5868255515194472e-06, | |
| "loss": 0.7358, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 25.51539832018325, | |
| "grad_norm": 57.31879425048828, | |
| "learning_rate": 1.5646761761318333e-06, | |
| "loss": 0.7468, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 25.5790277424281, | |
| "grad_norm": 51.115596771240234, | |
| "learning_rate": 1.5425710994949943e-06, | |
| "loss": 0.734, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 25.642657164672944, | |
| "grad_norm": 68.8400650024414, | |
| "learning_rate": 1.5204660228581556e-06, | |
| "loss": 0.7493, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 25.70628658691779, | |
| "grad_norm": 40.318153381347656, | |
| "learning_rate": 1.4983166474705415e-06, | |
| "loss": 0.7263, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 25.769916009162635, | |
| "grad_norm": 63.84051513671875, | |
| "learning_rate": 1.4761672720829273e-06, | |
| "loss": 0.7355, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 25.833545431407483, | |
| "grad_norm": 61.53810501098633, | |
| "learning_rate": 1.4540178966953133e-06, | |
| "loss": 0.745, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 25.897174853652327, | |
| "grad_norm": 51.513187408447266, | |
| "learning_rate": 1.4318685213076994e-06, | |
| "loss": 0.7301, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 25.960804275897175, | |
| "grad_norm": 65.9397201538086, | |
| "learning_rate": 1.4097191459200852e-06, | |
| "loss": 0.7457, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 26.024433698142023, | |
| "grad_norm": 59.864540100097656, | |
| "learning_rate": 1.3875697705324713e-06, | |
| "loss": 0.7072, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 26.088063120386867, | |
| "grad_norm": 52.43559646606445, | |
| "learning_rate": 1.3654203951448569e-06, | |
| "loss": 0.7212, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 26.151692542631714, | |
| "grad_norm": 45.977447509765625, | |
| "learning_rate": 1.343315318508018e-06, | |
| "loss": 0.7186, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 26.21532196487656, | |
| "grad_norm": 48.874996185302734, | |
| "learning_rate": 1.321165943120404e-06, | |
| "loss": 0.7225, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 26.278951387121406, | |
| "grad_norm": 57.50956344604492, | |
| "learning_rate": 1.29901656773279e-06, | |
| "loss": 0.7065, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 26.34258080936625, | |
| "grad_norm": 64.73326110839844, | |
| "learning_rate": 1.2768671923451759e-06, | |
| "loss": 0.7153, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 26.406210231611098, | |
| "grad_norm": 53.96969223022461, | |
| "learning_rate": 1.254762115708337e-06, | |
| "loss": 0.72, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 26.469839653855942, | |
| "grad_norm": 56.118255615234375, | |
| "learning_rate": 1.232612740320723e-06, | |
| "loss": 0.7074, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 26.53346907610079, | |
| "grad_norm": 57.4562873840332, | |
| "learning_rate": 1.2104633649331088e-06, | |
| "loss": 0.7117, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 26.597098498345634, | |
| "grad_norm": 60.367919921875, | |
| "learning_rate": 1.1883139895454949e-06, | |
| "loss": 0.7206, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 26.66072792059048, | |
| "grad_norm": 55.18882369995117, | |
| "learning_rate": 1.166164614157881e-06, | |
| "loss": 0.7132, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 26.724357342835326, | |
| "grad_norm": 48.3643798828125, | |
| "learning_rate": 1.144059537521042e-06, | |
| "loss": 0.7199, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 26.787986765080174, | |
| "grad_norm": 50.825225830078125, | |
| "learning_rate": 1.1219101621334278e-06, | |
| "loss": 0.7102, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 26.851616187325018, | |
| "grad_norm": 36.502899169921875, | |
| "learning_rate": 1.0997607867458139e-06, | |
| "loss": 0.7155, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 26.915245609569865, | |
| "grad_norm": 58.10041809082031, | |
| "learning_rate": 1.0776114113581997e-06, | |
| "loss": 0.7057, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 26.97887503181471, | |
| "grad_norm": 41.11175537109375, | |
| "learning_rate": 1.055506334721361e-06, | |
| "loss": 0.7191, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 27.042504454059557, | |
| "grad_norm": 56.8629150390625, | |
| "learning_rate": 1.0333569593337468e-06, | |
| "loss": 0.6942, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 27.106133876304405, | |
| "grad_norm": 43.03855514526367, | |
| "learning_rate": 1.011251882696908e-06, | |
| "loss": 0.6924, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 27.16976329854925, | |
| "grad_norm": 41.03914260864258, | |
| "learning_rate": 9.89102507309294e-07, | |
| "loss": 0.7025, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 27.233392720794097, | |
| "grad_norm": 44.40423583984375, | |
| "learning_rate": 9.6695313192168e-07, | |
| "loss": 0.6911, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 27.29702214303894, | |
| "grad_norm": 48.28982925415039, | |
| "learning_rate": 9.448037565340658e-07, | |
| "loss": 0.6955, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 27.36065156528379, | |
| "grad_norm": 58.85805130004883, | |
| "learning_rate": 9.226543811464518e-07, | |
| "loss": 0.6875, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 27.424280987528633, | |
| "grad_norm": 58.64131164550781, | |
| "learning_rate": 9.005050057588377e-07, | |
| "loss": 0.698, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 27.48791040977348, | |
| "grad_norm": 54.10153579711914, | |
| "learning_rate": 8.783999291219989e-07, | |
| "loss": 0.7054, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 27.551539832018324, | |
| "grad_norm": 37.60294723510742, | |
| "learning_rate": 8.562505537343847e-07, | |
| "loss": 0.6968, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 27.615169254263172, | |
| "grad_norm": 46.13175964355469, | |
| "learning_rate": 8.341011783467707e-07, | |
| "loss": 0.7044, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 27.678798676508016, | |
| "grad_norm": 49.83407211303711, | |
| "learning_rate": 8.119518029591567e-07, | |
| "loss": 0.6946, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 27.742428098752864, | |
| "grad_norm": 62.65508270263672, | |
| "learning_rate": 7.898024275715425e-07, | |
| "loss": 0.6865, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 27.806057520997708, | |
| "grad_norm": 64.78981018066406, | |
| "learning_rate": 7.676530521839285e-07, | |
| "loss": 0.6974, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 27.869686943242556, | |
| "grad_norm": 55.65605926513672, | |
| "learning_rate": 7.455479755470895e-07, | |
| "loss": 0.698, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 27.9333163654874, | |
| "grad_norm": 51.40291976928711, | |
| "learning_rate": 7.233986001594756e-07, | |
| "loss": 0.6943, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 27.996945787732248, | |
| "grad_norm": 52.821475982666016, | |
| "learning_rate": 7.012492247718615e-07, | |
| "loss": 0.6985, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 28.06057520997709, | |
| "grad_norm": 105.65634155273438, | |
| "learning_rate": 6.790998493842474e-07, | |
| "loss": 0.6785, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 28.12420463222194, | |
| "grad_norm": 66.97595977783203, | |
| "learning_rate": 6.569504739966333e-07, | |
| "loss": 0.6842, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 28.187834054466787, | |
| "grad_norm": 77.8376693725586, | |
| "learning_rate": 6.348010986090193e-07, | |
| "loss": 0.6832, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 28.25146347671163, | |
| "grad_norm": 68.83918762207031, | |
| "learning_rate": 6.126517232214052e-07, | |
| "loss": 0.6863, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 28.31509289895648, | |
| "grad_norm": 49.16581344604492, | |
| "learning_rate": 5.905023478337911e-07, | |
| "loss": 0.6806, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 28.378722321201323, | |
| "grad_norm": 58.93035888671875, | |
| "learning_rate": 5.683972711969523e-07, | |
| "loss": 0.6897, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 28.44235174344617, | |
| "grad_norm": 57.476531982421875, | |
| "learning_rate": 5.462478958093382e-07, | |
| "loss": 0.6975, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 28.505981165691015, | |
| "grad_norm": 56.7477912902832, | |
| "learning_rate": 5.240985204217242e-07, | |
| "loss": 0.6802, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 28.569610587935863, | |
| "grad_norm": 45.01617431640625, | |
| "learning_rate": 5.0194914503411e-07, | |
| "loss": 0.6836, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 28.633240010180707, | |
| "grad_norm": 49.77652359008789, | |
| "learning_rate": 4.79799769646496e-07, | |
| "loss": 0.6849, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 28.696869432425554, | |
| "grad_norm": 52.57892990112305, | |
| "learning_rate": 4.57650394258882e-07, | |
| "loss": 0.6781, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 28.7604988546704, | |
| "grad_norm": 64.97437286376953, | |
| "learning_rate": 4.3550101887126787e-07, | |
| "loss": 0.6761, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 28.824128276915246, | |
| "grad_norm": 52.035160064697266, | |
| "learning_rate": 4.133516434836538e-07, | |
| "loss": 0.6762, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 28.88775769916009, | |
| "grad_norm": 57.393035888671875, | |
| "learning_rate": 3.912022680960397e-07, | |
| "loss": 0.6781, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 28.951387121404938, | |
| "grad_norm": 49.78774642944336, | |
| "learning_rate": 3.691414902099761e-07, | |
| "loss": 0.682, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 29.015016543649782, | |
| "grad_norm": 47.4661750793457, | |
| "learning_rate": 3.4699211482236206e-07, | |
| "loss": 0.6742, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 29.07864596589463, | |
| "grad_norm": 69.56925964355469, | |
| "learning_rate": 3.248870381855232e-07, | |
| "loss": 0.6595, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 29.142275388139474, | |
| "grad_norm": 49.844520568847656, | |
| "learning_rate": 3.027376627979091e-07, | |
| "loss": 0.683, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 29.20590481038432, | |
| "grad_norm": 58.6362419128418, | |
| "learning_rate": 2.8058828741029506e-07, | |
| "loss": 0.6721, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 29.26953423262917, | |
| "grad_norm": 44.214717864990234, | |
| "learning_rate": 2.5843891202268095e-07, | |
| "loss": 0.669, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 29.333163654874014, | |
| "grad_norm": 50.08256530761719, | |
| "learning_rate": 2.3628953663506691e-07, | |
| "loss": 0.683, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 29.39679307711886, | |
| "grad_norm": 51.15972900390625, | |
| "learning_rate": 2.1414016124745283e-07, | |
| "loss": 0.6652, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 29.460422499363705, | |
| "grad_norm": 47.7255859375, | |
| "learning_rate": 1.92035084610614e-07, | |
| "loss": 0.671, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 29.524051921608553, | |
| "grad_norm": 45.42967987060547, | |
| "learning_rate": 1.6988570922299992e-07, | |
| "loss": 0.6662, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 29.587681343853397, | |
| "grad_norm": 47.5881462097168, | |
| "learning_rate": 1.4773633383538586e-07, | |
| "loss": 0.6665, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 29.651310766098245, | |
| "grad_norm": 71.63655090332031, | |
| "learning_rate": 1.2558695844777177e-07, | |
| "loss": 0.6718, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 29.71494018834309, | |
| "grad_norm": 45.697998046875, | |
| "learning_rate": 1.0343758306015771e-07, | |
| "loss": 0.6657, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 29.778569610587937, | |
| "grad_norm": 58.17982864379883, | |
| "learning_rate": 8.128820767254363e-08, | |
| "loss": 0.6677, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 29.84219903283278, | |
| "grad_norm": 46.64686965942383, | |
| "learning_rate": 5.9138832284929565e-08, | |
| "loss": 0.6732, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 29.90582845507763, | |
| "grad_norm": 52.96521759033203, | |
| "learning_rate": 3.69894568973155e-08, | |
| "loss": 0.6687, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 29.969457877322473, | |
| "grad_norm": 44.063079833984375, | |
| "learning_rate": 1.4840081509701428e-08, | |
| "loss": 0.6732, | |
| "step": 235500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 235740, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |