| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9991254154276719, | |
| "eval_steps": 500, | |
| "global_step": 1071, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004664451052416769, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.8476, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.009328902104833538, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.861, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.013993353157250307, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.8574, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.018657804209667075, | |
| "grad_norm": 1.7217303169710205, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.8586, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.023322255262083844, | |
| "grad_norm": 1.0811304401552388, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.8027, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.027986706314500613, | |
| "grad_norm": 0.6678730431826253, | |
| "learning_rate": 1.2962962962962962e-05, | |
| "loss": 0.7386, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03265115736691738, | |
| "grad_norm": 0.49414971424324633, | |
| "learning_rate": 1.7592592592592595e-05, | |
| "loss": 0.6911, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.03731560841933415, | |
| "grad_norm": 0.4956290038506038, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.6738, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.041980059471750916, | |
| "grad_norm": 0.45113968047451636, | |
| "learning_rate": 2.6851851851851855e-05, | |
| "loss": 0.6615, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.04664451052416769, | |
| "grad_norm": 0.4151578102626258, | |
| "learning_rate": 3.148148148148148e-05, | |
| "loss": 0.637, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.051308961576584454, | |
| "grad_norm": 0.41598749293660114, | |
| "learning_rate": 3.611111111111111e-05, | |
| "loss": 0.6521, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.055973412629001226, | |
| "grad_norm": 0.44364992697109346, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 0.6416, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06063786368141799, | |
| "grad_norm": 0.4334859115222668, | |
| "learning_rate": 4.5370370370370374e-05, | |
| "loss": 0.6179, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06530231473383476, | |
| "grad_norm": 0.4168421218480663, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6455, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06996676578625154, | |
| "grad_norm": 0.5598377478471296, | |
| "learning_rate": 4.999731625198103e-05, | |
| "loss": 0.6218, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0746312168386683, | |
| "grad_norm": 0.5562791478235322, | |
| "learning_rate": 4.998926564814665e-05, | |
| "loss": 0.6164, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07929566789108507, | |
| "grad_norm": 0.494836009809662, | |
| "learning_rate": 4.997585010901171e-05, | |
| "loss": 0.6083, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08396011894350183, | |
| "grad_norm": 0.47761536307054875, | |
| "learning_rate": 4.995707283492525e-05, | |
| "loss": 0.597, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.08862456999591861, | |
| "grad_norm": 0.5012276873640602, | |
| "learning_rate": 4.9932938305306997e-05, | |
| "loss": 0.5986, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09328902104833538, | |
| "grad_norm": 0.558780335812584, | |
| "learning_rate": 4.990345227757883e-05, | |
| "loss": 0.6108, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09795347210075214, | |
| "grad_norm": 0.5027530843851625, | |
| "learning_rate": 4.986862178579129e-05, | |
| "loss": 0.616, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.10261792315316891, | |
| "grad_norm": 0.5276174777450547, | |
| "learning_rate": 4.982845513894555e-05, | |
| "loss": 0.6028, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.10728237420558567, | |
| "grad_norm": 0.48105561141886855, | |
| "learning_rate": 4.9782961919011294e-05, | |
| "loss": 0.5926, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11194682525800245, | |
| "grad_norm": 0.5884754531701646, | |
| "learning_rate": 4.973215297864088e-05, | |
| "loss": 0.5997, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11661127631041922, | |
| "grad_norm": 0.5038947888532336, | |
| "learning_rate": 4.967604043858034e-05, | |
| "loss": 0.5966, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.12127572736283598, | |
| "grad_norm": 0.4628854625972218, | |
| "learning_rate": 4.9614637684777964e-05, | |
| "loss": 0.5889, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12594017841525276, | |
| "grad_norm": 0.369569834468325, | |
| "learning_rate": 4.954795936519099e-05, | |
| "loss": 0.5886, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.13060462946766951, | |
| "grad_norm": 0.510224312267307, | |
| "learning_rate": 4.9476021386291255e-05, | |
| "loss": 0.5904, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1352690805200863, | |
| "grad_norm": 0.5165955737252187, | |
| "learning_rate": 4.93988409092706e-05, | |
| "loss": 0.5931, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.13993353157250307, | |
| "grad_norm": 0.45509182070489973, | |
| "learning_rate": 4.931643634594701e-05, | |
| "loss": 0.5764, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14459798262491982, | |
| "grad_norm": 0.41968982538759186, | |
| "learning_rate": 4.9228827354372374e-05, | |
| "loss": 0.5809, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1492624336773366, | |
| "grad_norm": 0.41229365733365814, | |
| "learning_rate": 4.9136034834142906e-05, | |
| "loss": 0.5886, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.15392688472975335, | |
| "grad_norm": 0.3826932795909075, | |
| "learning_rate": 4.9038080921413485e-05, | |
| "loss": 0.5842, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.15859133578217013, | |
| "grad_norm": 0.41134892045101074, | |
| "learning_rate": 4.893498898361693e-05, | |
| "loss": 0.5946, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1632557868345869, | |
| "grad_norm": 0.38992267173979334, | |
| "learning_rate": 4.882678361388958e-05, | |
| "loss": 0.5885, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.16792023788700366, | |
| "grad_norm": 0.40073952235248766, | |
| "learning_rate": 4.871349062520448e-05, | |
| "loss": 0.5841, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.17258468893942044, | |
| "grad_norm": 0.4200184545679776, | |
| "learning_rate": 4.859513704421353e-05, | |
| "loss": 0.5771, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.17724913999183722, | |
| "grad_norm": 0.4292182704616412, | |
| "learning_rate": 4.847175110480015e-05, | |
| "loss": 0.5708, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.18191359104425397, | |
| "grad_norm": 0.450661348267434, | |
| "learning_rate": 4.8343362241343944e-05, | |
| "loss": 0.5708, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.18657804209667075, | |
| "grad_norm": 0.3869287419256418, | |
| "learning_rate": 4.8210001081698954e-05, | |
| "loss": 0.5708, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1912424931490875, | |
| "grad_norm": 0.42553333511533853, | |
| "learning_rate": 4.8071699439887215e-05, | |
| "loss": 0.5845, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.19590694420150429, | |
| "grad_norm": 0.4146813048215204, | |
| "learning_rate": 4.792849030850938e-05, | |
| "loss": 0.5762, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.20057139525392106, | |
| "grad_norm": 0.5810537942195573, | |
| "learning_rate": 4.778040785087412e-05, | |
| "loss": 0.5736, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.20523584630633782, | |
| "grad_norm": 0.4540901088901895, | |
| "learning_rate": 4.762748739284831e-05, | |
| "loss": 0.5695, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2099002973587546, | |
| "grad_norm": 0.39778098326448963, | |
| "learning_rate": 4.746976541442986e-05, | |
| "loss": 0.562, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.21456474841117135, | |
| "grad_norm": 0.45060914716273687, | |
| "learning_rate": 4.730727954104515e-05, | |
| "loss": 0.5708, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.21922919946358813, | |
| "grad_norm": 0.40075705306410814, | |
| "learning_rate": 4.714006853457339e-05, | |
| "loss": 0.5779, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2238936505160049, | |
| "grad_norm": 0.3988838422959827, | |
| "learning_rate": 4.6968172284099654e-05, | |
| "loss": 0.5769, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.22855810156842166, | |
| "grad_norm": 0.40730912286641724, | |
| "learning_rate": 4.679163179639923e-05, | |
| "loss": 0.5622, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.23322255262083844, | |
| "grad_norm": 0.41570250249981516, | |
| "learning_rate": 4.661048918615513e-05, | |
| "loss": 0.5626, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.23788700367325522, | |
| "grad_norm": 0.37266061168544923, | |
| "learning_rate": 4.642478766591148e-05, | |
| "loss": 0.5603, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.24255145472567197, | |
| "grad_norm": 0.41204417027814066, | |
| "learning_rate": 4.623457153576493e-05, | |
| "loss": 0.5692, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.24721590577808875, | |
| "grad_norm": 0.3517500791021223, | |
| "learning_rate": 4.603988617279655e-05, | |
| "loss": 0.5575, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2518803568305055, | |
| "grad_norm": 0.3626203299511348, | |
| "learning_rate": 4.5840778020247025e-05, | |
| "loss": 0.5561, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2565448078829223, | |
| "grad_norm": 0.4323865340964901, | |
| "learning_rate": 4.56372945764372e-05, | |
| "loss": 0.5605, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.26120925893533903, | |
| "grad_norm": 0.4558769125407336, | |
| "learning_rate": 4.542948438343725e-05, | |
| "loss": 0.564, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.26587370998775584, | |
| "grad_norm": 0.3788810919577419, | |
| "learning_rate": 4.521739701548662e-05, | |
| "loss": 0.5689, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.2705381610401726, | |
| "grad_norm": 0.3494299182914199, | |
| "learning_rate": 4.500108306716784e-05, | |
| "loss": 0.5532, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.27520261209258934, | |
| "grad_norm": 0.3243682374762204, | |
| "learning_rate": 4.478059414133695e-05, | |
| "loss": 0.5472, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.27986706314500615, | |
| "grad_norm": 0.3774629263697534, | |
| "learning_rate": 4.455598283681331e-05, | |
| "loss": 0.5631, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2845315141974229, | |
| "grad_norm": 0.3590842486979046, | |
| "learning_rate": 4.43273027358319e-05, | |
| "loss": 0.5473, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.28919596524983965, | |
| "grad_norm": 0.33869526982866255, | |
| "learning_rate": 4.4094608391260996e-05, | |
| "loss": 0.5556, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.29386041630225646, | |
| "grad_norm": 0.3740834437948184, | |
| "learning_rate": 4.3857955313588256e-05, | |
| "loss": 0.5586, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2985248673546732, | |
| "grad_norm": 0.4069053631763161, | |
| "learning_rate": 4.3617399957678426e-05, | |
| "loss": 0.5699, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.30318931840708996, | |
| "grad_norm": 0.42775595832918634, | |
| "learning_rate": 4.3372999709305726e-05, | |
| "loss": 0.5541, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3078537694595067, | |
| "grad_norm": 0.4225047260621452, | |
| "learning_rate": 4.3124812871464146e-05, | |
| "loss": 0.5612, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3125182205119235, | |
| "grad_norm": 0.4064532119864525, | |
| "learning_rate": 4.287289865045895e-05, | |
| "loss": 0.5566, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.31718267156434027, | |
| "grad_norm": 0.40813784397443764, | |
| "learning_rate": 4.261731714178274e-05, | |
| "loss": 0.5427, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.321847122616757, | |
| "grad_norm": 0.37505518830201817, | |
| "learning_rate": 4.23581293157793e-05, | |
| "loss": 0.5573, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.3265115736691738, | |
| "grad_norm": 0.40656917887904453, | |
| "learning_rate": 4.20953970030988e-05, | |
| "loss": 0.5531, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3311760247215906, | |
| "grad_norm": 0.39600365804673093, | |
| "learning_rate": 4.182918287994781e-05, | |
| "loss": 0.5475, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.33584047577400733, | |
| "grad_norm": 0.3703296359847103, | |
| "learning_rate": 4.155955045313748e-05, | |
| "loss": 0.5651, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.34050492682642414, | |
| "grad_norm": 0.3666872032980013, | |
| "learning_rate": 4.128656404493371e-05, | |
| "loss": 0.559, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.3451693778788409, | |
| "grad_norm": 0.33550827998787563, | |
| "learning_rate": 4.10102887777127e-05, | |
| "loss": 0.5609, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.34983382893125764, | |
| "grad_norm": 0.3730387291098042, | |
| "learning_rate": 4.073079055842566e-05, | |
| "loss": 0.5485, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.35449827998367445, | |
| "grad_norm": 0.3970093889085142, | |
| "learning_rate": 4.044813606287634e-05, | |
| "loss": 0.5475, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3591627310360912, | |
| "grad_norm": 0.32485469708579506, | |
| "learning_rate": 4.016239271981519e-05, | |
| "loss": 0.5461, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.36382718208850795, | |
| "grad_norm": 0.3382837402395568, | |
| "learning_rate": 3.987362869485384e-05, | |
| "loss": 0.5496, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3684916331409247, | |
| "grad_norm": 0.3609893182037058, | |
| "learning_rate": 3.9581912874203854e-05, | |
| "loss": 0.5561, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3731560841933415, | |
| "grad_norm": 0.35168703173333654, | |
| "learning_rate": 3.928731484824351e-05, | |
| "loss": 0.5479, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.37782053524575826, | |
| "grad_norm": 0.3629018488370036, | |
| "learning_rate": 3.898990489491668e-05, | |
| "loss": 0.5501, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.382484986298175, | |
| "grad_norm": 0.37390402781019416, | |
| "learning_rate": 3.8689753962967636e-05, | |
| "loss": 0.5354, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3871494373505918, | |
| "grad_norm": 0.335380363177113, | |
| "learning_rate": 3.838693365501586e-05, | |
| "loss": 0.5469, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.39181388840300857, | |
| "grad_norm": 0.3424832891358978, | |
| "learning_rate": 3.80815162104748e-05, | |
| "loss": 0.552, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3964783394554253, | |
| "grad_norm": 0.37221668230659216, | |
| "learning_rate": 3.7773574488318854e-05, | |
| "loss": 0.5553, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.40114279050784213, | |
| "grad_norm": 0.37074728239538524, | |
| "learning_rate": 3.746318194970239e-05, | |
| "loss": 0.5523, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4058072415602589, | |
| "grad_norm": 0.38839870040744245, | |
| "learning_rate": 3.715041264043525e-05, | |
| "loss": 0.556, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.41047169261267563, | |
| "grad_norm": 0.3575823882886992, | |
| "learning_rate": 3.683534117331869e-05, | |
| "loss": 0.5485, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.41513614366509244, | |
| "grad_norm": 0.3161641377206891, | |
| "learning_rate": 3.65180427103461e-05, | |
| "loss": 0.5467, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.4198005947175092, | |
| "grad_norm": 0.3703781300095459, | |
| "learning_rate": 3.619859294477273e-05, | |
| "loss": 0.5492, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.42446504576992594, | |
| "grad_norm": 0.3492760299630012, | |
| "learning_rate": 3.587706808305861e-05, | |
| "loss": 0.5421, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.4291294968223427, | |
| "grad_norm": 0.3267136941264168, | |
| "learning_rate": 3.5553544826689145e-05, | |
| "loss": 0.538, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4337939478747595, | |
| "grad_norm": 0.29422503748430795, | |
| "learning_rate": 3.522810035387752e-05, | |
| "loss": 0.5288, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.43845839892717625, | |
| "grad_norm": 0.34737447664291465, | |
| "learning_rate": 3.490081230115343e-05, | |
| "loss": 0.5525, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.443122849979593, | |
| "grad_norm": 0.3404046261795078, | |
| "learning_rate": 3.4571758744842507e-05, | |
| "loss": 0.5522, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.4477873010320098, | |
| "grad_norm": 0.36447616317715154, | |
| "learning_rate": 3.4241018182440735e-05, | |
| "loss": 0.5441, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.45245175208442656, | |
| "grad_norm": 0.31157047733964094, | |
| "learning_rate": 3.390866951388847e-05, | |
| "loss": 0.5391, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.4571162031368433, | |
| "grad_norm": 0.3116582428424639, | |
| "learning_rate": 3.3574792022748466e-05, | |
| "loss": 0.5535, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4617806541892601, | |
| "grad_norm": 0.3279027907069977, | |
| "learning_rate": 3.3239465357292304e-05, | |
| "loss": 0.5376, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.46644510524167687, | |
| "grad_norm": 0.3484015010403029, | |
| "learning_rate": 3.290276951149992e-05, | |
| "loss": 0.5336, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4711095562940936, | |
| "grad_norm": 0.33469705746842926, | |
| "learning_rate": 3.256478480597656e-05, | |
| "loss": 0.546, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.47577400734651043, | |
| "grad_norm": 0.30392678850247695, | |
| "learning_rate": 3.222559186879191e-05, | |
| "loss": 0.5407, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4804384583989272, | |
| "grad_norm": 0.29705201467826636, | |
| "learning_rate": 3.18852716162458e-05, | |
| "loss": 0.5395, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.48510290945134393, | |
| "grad_norm": 0.31309471816872475, | |
| "learning_rate": 3.154390523356523e-05, | |
| "loss": 0.5309, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.48976736050376074, | |
| "grad_norm": 0.3532094930450357, | |
| "learning_rate": 3.1201574155537155e-05, | |
| "loss": 0.5341, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.4944318115561775, | |
| "grad_norm": 0.3130231914781531, | |
| "learning_rate": 3.085836004708179e-05, | |
| "loss": 0.5243, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.49909626260859424, | |
| "grad_norm": 0.32746390804605524, | |
| "learning_rate": 3.0514344783771015e-05, | |
| "loss": 0.5458, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.503760713661011, | |
| "grad_norm": 0.3140774516982654, | |
| "learning_rate": 3.0169610432296513e-05, | |
| "loss": 0.542, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5084251647134278, | |
| "grad_norm": 0.33623222677088005, | |
| "learning_rate": 2.9824239230892316e-05, | |
| "loss": 0.5449, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5130896157658446, | |
| "grad_norm": 0.34034229987032566, | |
| "learning_rate": 2.9478313569716425e-05, | |
| "loss": 0.5399, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5177540668182613, | |
| "grad_norm": 0.31316724089217596, | |
| "learning_rate": 2.9131915971196216e-05, | |
| "loss": 0.538, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5224185178706781, | |
| "grad_norm": 0.3088718959081153, | |
| "learning_rate": 2.8785129070342247e-05, | |
| "loss": 0.5415, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5270829689230948, | |
| "grad_norm": 0.34309616279067395, | |
| "learning_rate": 2.8438035595035235e-05, | |
| "loss": 0.5323, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5317474199755117, | |
| "grad_norm": 0.3160243240149518, | |
| "learning_rate": 2.8090718346290902e-05, | |
| "loss": 0.5227, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5364118710279284, | |
| "grad_norm": 0.30318761428213864, | |
| "learning_rate": 2.77432601785073e-05, | |
| "loss": 0.538, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.5410763220803452, | |
| "grad_norm": 0.33554969124573514, | |
| "learning_rate": 2.7395743979699527e-05, | |
| "loss": 0.5423, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5457407731327619, | |
| "grad_norm": 0.29358246176347985, | |
| "learning_rate": 2.7048252651726237e-05, | |
| "loss": 0.5302, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.5504052241851787, | |
| "grad_norm": 0.27227963891831986, | |
| "learning_rate": 2.6700869090513025e-05, | |
| "loss": 0.5276, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5550696752375954, | |
| "grad_norm": 0.3315788476523275, | |
| "learning_rate": 2.6353676166277175e-05, | |
| "loss": 0.5375, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.5597341262900123, | |
| "grad_norm": 0.29167775719810907, | |
| "learning_rate": 2.6006756703758462e-05, | |
| "loss": 0.5287, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.564398577342429, | |
| "grad_norm": 0.2836903485895201, | |
| "learning_rate": 2.5660193462460914e-05, | |
| "loss": 0.5209, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.5690630283948458, | |
| "grad_norm": 0.312902944542489, | |
| "learning_rate": 2.5314069116910073e-05, | |
| "loss": 0.5352, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5737274794472625, | |
| "grad_norm": 0.3061911987604843, | |
| "learning_rate": 2.496846623693052e-05, | |
| "loss": 0.5418, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.5783919304996793, | |
| "grad_norm": 0.30918289084780787, | |
| "learning_rate": 2.4623467267948453e-05, | |
| "loss": 0.5264, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.583056381552096, | |
| "grad_norm": 0.29432202760005055, | |
| "learning_rate": 2.427915451132382e-05, | |
| "loss": 0.5303, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.5877208326045129, | |
| "grad_norm": 0.30248260299992763, | |
| "learning_rate": 2.3935610104716934e-05, | |
| "loss": 0.5318, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5923852836569297, | |
| "grad_norm": 0.2921703613190239, | |
| "learning_rate": 2.359291600249407e-05, | |
| "loss": 0.5351, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.5970497347093464, | |
| "grad_norm": 0.2861561331468405, | |
| "learning_rate": 2.325115395617683e-05, | |
| "loss": 0.5191, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6017141857617632, | |
| "grad_norm": 0.29199262406988685, | |
| "learning_rate": 2.291040549493985e-05, | |
| "loss": 0.5314, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6063786368141799, | |
| "grad_norm": 0.28022740610521435, | |
| "learning_rate": 2.2570751906161624e-05, | |
| "loss": 0.5189, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6110430878665967, | |
| "grad_norm": 0.32229685451288487, | |
| "learning_rate": 2.223227421603289e-05, | |
| "loss": 0.5277, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6157075389190134, | |
| "grad_norm": 0.2812720351153964, | |
| "learning_rate": 2.1895053170227464e-05, | |
| "loss": 0.5315, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6203719899714303, | |
| "grad_norm": 0.2888622086540738, | |
| "learning_rate": 2.1559169214639884e-05, | |
| "loss": 0.5277, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.625036441023847, | |
| "grad_norm": 0.3120048352568711, | |
| "learning_rate": 2.122470247619464e-05, | |
| "loss": 0.525, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6297008920762638, | |
| "grad_norm": 0.2818693243090786, | |
| "learning_rate": 2.0891732743731434e-05, | |
| "loss": 0.5322, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.6343653431286805, | |
| "grad_norm": 0.2926059214892839, | |
| "learning_rate": 2.0560339448971146e-05, | |
| "loss": 0.525, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6390297941810973, | |
| "grad_norm": 0.29268503655464984, | |
| "learning_rate": 2.0230601647566966e-05, | |
| "loss": 0.5247, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.643694245233514, | |
| "grad_norm": 0.2728644227058383, | |
| "learning_rate": 1.9902598000245222e-05, | |
| "loss": 0.5185, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6483586962859309, | |
| "grad_norm": 0.25962809295866146, | |
| "learning_rate": 1.9576406754040467e-05, | |
| "loss": 0.5228, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.6530231473383477, | |
| "grad_norm": 0.2939794591562482, | |
| "learning_rate": 1.925210572362922e-05, | |
| "loss": 0.5402, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6576875983907644, | |
| "grad_norm": 0.2675031379318846, | |
| "learning_rate": 1.892977227276685e-05, | |
| "loss": 0.5224, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.6623520494431812, | |
| "grad_norm": 0.33077413707192227, | |
| "learning_rate": 1.8609483295832036e-05, | |
| "loss": 0.5145, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6670165004955979, | |
| "grad_norm": 0.28316621823190463, | |
| "learning_rate": 1.829131519948323e-05, | |
| "loss": 0.518, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.6716809515480147, | |
| "grad_norm": 0.28091675149417483, | |
| "learning_rate": 1.7975343884431357e-05, | |
| "loss": 0.5204, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6763454026004314, | |
| "grad_norm": 0.30963978843403667, | |
| "learning_rate": 1.7661644727333403e-05, | |
| "loss": 0.524, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.6810098536528483, | |
| "grad_norm": 0.27818187692784135, | |
| "learning_rate": 1.7350292562810832e-05, | |
| "loss": 0.5202, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.685674304705265, | |
| "grad_norm": 0.28024391562302353, | |
| "learning_rate": 1.704136166559737e-05, | |
| "loss": 0.5216, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.6903387557576818, | |
| "grad_norm": 0.25776425960390403, | |
| "learning_rate": 1.6734925732820454e-05, | |
| "loss": 0.5201, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6950032068100985, | |
| "grad_norm": 0.25912854344445496, | |
| "learning_rate": 1.6431057866420313e-05, | |
| "loss": 0.5154, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.6996676578625153, | |
| "grad_norm": 0.288843246635576, | |
| "learning_rate": 1.61298305557111e-05, | |
| "loss": 0.5319, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.704332108914932, | |
| "grad_norm": 0.27843803013272117, | |
| "learning_rate": 1.583131566008825e-05, | |
| "loss": 0.5322, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7089965599673489, | |
| "grad_norm": 0.2724850636300469, | |
| "learning_rate": 1.553558439188594e-05, | |
| "loss": 0.5188, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7136610110197656, | |
| "grad_norm": 0.27405031166570964, | |
| "learning_rate": 1.5242707299389086e-05, | |
| "loss": 0.5264, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.7183254620721824, | |
| "grad_norm": 0.28279666997673636, | |
| "learning_rate": 1.4952754250003637e-05, | |
| "loss": 0.5296, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7229899131245991, | |
| "grad_norm": 0.276837438024271, | |
| "learning_rate": 1.4665794413589298e-05, | |
| "loss": 0.5364, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.7276543641770159, | |
| "grad_norm": 0.2724777598286785, | |
| "learning_rate": 1.4381896245958752e-05, | |
| "loss": 0.5158, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7323188152294327, | |
| "grad_norm": 0.2649273346430747, | |
| "learning_rate": 1.4101127472547084e-05, | |
| "loss": 0.519, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.7369832662818494, | |
| "grad_norm": 0.2699794671741458, | |
| "learning_rate": 1.3823555072255606e-05, | |
| "loss": 0.5276, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7416477173342663, | |
| "grad_norm": 0.2597781223574146, | |
| "learning_rate": 1.354924526147357e-05, | |
| "loss": 0.5158, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.746312168386683, | |
| "grad_norm": 0.28723301584203204, | |
| "learning_rate": 1.3278263478281994e-05, | |
| "loss": 0.5224, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7509766194390998, | |
| "grad_norm": 0.2535306464938602, | |
| "learning_rate": 1.3010674366843001e-05, | |
| "loss": 0.5361, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.7556410704915165, | |
| "grad_norm": 0.27848610430564497, | |
| "learning_rate": 1.2746541761978592e-05, | |
| "loss": 0.5148, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7603055215439333, | |
| "grad_norm": 0.26508085752140736, | |
| "learning_rate": 1.2485928673942568e-05, | |
| "loss": 0.5197, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.76496997259635, | |
| "grad_norm": 0.2765576189082103, | |
| "learning_rate": 1.2228897273389022e-05, | |
| "loss": 0.5043, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7696344236487669, | |
| "grad_norm": 0.25179624599466677, | |
| "learning_rate": 1.1975508876541262e-05, | |
| "loss": 0.5252, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.7742988747011836, | |
| "grad_norm": 0.2597455495271655, | |
| "learning_rate": 1.1725823930564436e-05, | |
| "loss": 0.5292, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7789633257536004, | |
| "grad_norm": 0.2525298953468885, | |
| "learning_rate": 1.1479901999145583e-05, | |
| "loss": 0.5194, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.7836277768060171, | |
| "grad_norm": 0.26668419176346814, | |
| "learning_rate": 1.1237801748284375e-05, | |
| "loss": 0.5183, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7882922278584339, | |
| "grad_norm": 0.2576707752493972, | |
| "learning_rate": 1.099958093229802e-05, | |
| "loss": 0.5145, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.7929566789108506, | |
| "grad_norm": 0.2519900873666899, | |
| "learning_rate": 1.0765296380043684e-05, | |
| "loss": 0.52, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7976211299632674, | |
| "grad_norm": 0.25029147728099826, | |
| "learning_rate": 1.0535003981361613e-05, | |
| "loss": 0.5085, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.8022855810156843, | |
| "grad_norm": 0.25974689320051614, | |
| "learning_rate": 1.030875867374238e-05, | |
| "loss": 0.5276, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.806950032068101, | |
| "grad_norm": 0.2750183412160561, | |
| "learning_rate": 1.008661442922118e-05, | |
| "loss": 0.5288, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8116144831205178, | |
| "grad_norm": 0.24684508884873066, | |
| "learning_rate": 9.868624241502573e-06, | |
| "loss": 0.5108, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8162789341729345, | |
| "grad_norm": 0.24917506535926326, | |
| "learning_rate": 9.654840113318506e-06, | |
| "loss": 0.5153, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.8209433852253513, | |
| "grad_norm": 0.26588122762475325, | |
| "learning_rate": 9.445313044022797e-06, | |
| "loss": 0.5134, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.825607836277768, | |
| "grad_norm": 0.25205075163043134, | |
| "learning_rate": 9.240093017424978e-06, | |
| "loss": 0.5234, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.8302722873301849, | |
| "grad_norm": 0.24453056572204862, | |
| "learning_rate": 9.039228989866358e-06, | |
| "loss": 0.5256, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8349367383826016, | |
| "grad_norm": 0.25964646851376255, | |
| "learning_rate": 8.84276887854126e-06, | |
| "loss": 0.5197, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.8396011894350184, | |
| "grad_norm": 0.2380789074605979, | |
| "learning_rate": 8.650759550066084e-06, | |
| "loss": 0.5226, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8442656404874351, | |
| "grad_norm": 0.25537829061619166, | |
| "learning_rate": 8.46324680929905e-06, | |
| "loss": 0.5109, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.8489300915398519, | |
| "grad_norm": 0.24326625950869546, | |
| "learning_rate": 8.280275388413186e-06, | |
| "loss": 0.5171, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8535945425922686, | |
| "grad_norm": 0.283956175304338, | |
| "learning_rate": 8.10188893622523e-06, | |
| "loss": 0.5138, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.8582589936446854, | |
| "grad_norm": 0.26712492993115533, | |
| "learning_rate": 7.928130007782977e-06, | |
| "loss": 0.5347, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8629234446971022, | |
| "grad_norm": 0.2770179693009444, | |
| "learning_rate": 7.759040054213531e-06, | |
| "loss": 0.5332, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.867587895749519, | |
| "grad_norm": 0.25471359044347464, | |
| "learning_rate": 7.59465941283494e-06, | |
| "loss": 0.5245, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8722523468019358, | |
| "grad_norm": 0.2474269162576819, | |
| "learning_rate": 7.435027297533474e-06, | |
| "loss": 0.5122, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.8769167978543525, | |
| "grad_norm": 0.25130162397546496, | |
| "learning_rate": 7.2801817894089756e-06, | |
| "loss": 0.5063, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.8815812489067693, | |
| "grad_norm": 0.24881956227908078, | |
| "learning_rate": 7.130159827690404e-06, | |
| "loss": 0.5177, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.886245699959186, | |
| "grad_norm": 0.25545295509666294, | |
| "learning_rate": 6.98499720092375e-06, | |
| "loss": 0.508, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8909101510116029, | |
| "grad_norm": 0.24814116495940802, | |
| "learning_rate": 6.844728538434536e-06, | |
| "loss": 0.5156, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.8955746020640196, | |
| "grad_norm": 0.24345586049308032, | |
| "learning_rate": 6.709387302066758e-06, | |
| "loss": 0.5192, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9002390531164364, | |
| "grad_norm": 0.25035521575621933, | |
| "learning_rate": 6.579005778200434e-06, | |
| "loss": 0.5253, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.9049035041688531, | |
| "grad_norm": 0.2580394858408089, | |
| "learning_rate": 6.453615070049447e-06, | |
| "loss": 0.5251, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.9095679552212699, | |
| "grad_norm": 0.23956216998636262, | |
| "learning_rate": 6.3332450902417666e-06, | |
| "loss": 0.5107, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.9142324062736866, | |
| "grad_norm": 0.23764348877193034, | |
| "learning_rate": 6.21792455368361e-06, | |
| "loss": 0.5129, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9188968573261035, | |
| "grad_norm": 0.2520198832874535, | |
| "learning_rate": 6.1076809707093225e-06, | |
| "loss": 0.5208, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.9235613083785202, | |
| "grad_norm": 0.23664963056881916, | |
| "learning_rate": 6.002540640518684e-06, | |
| "loss": 0.5066, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.928225759430937, | |
| "grad_norm": 0.3088138680489082, | |
| "learning_rate": 5.902528644903055e-06, | |
| "loss": 0.5153, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.9328902104833537, | |
| "grad_norm": 0.24722065847341207, | |
| "learning_rate": 5.807668842262004e-06, | |
| "loss": 0.5169, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9375546615357705, | |
| "grad_norm": 0.2351634623111495, | |
| "learning_rate": 5.7179838619117394e-06, | |
| "loss": 0.5097, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.9422191125881872, | |
| "grad_norm": 0.2422992129098815, | |
| "learning_rate": 5.633495098686789e-06, | |
| "loss": 0.5119, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.946883563640604, | |
| "grad_norm": 0.2550904076289393, | |
| "learning_rate": 5.554222707836121e-06, | |
| "loss": 0.5147, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.9515480146930209, | |
| "grad_norm": 0.25885529407119634, | |
| "learning_rate": 5.480185600215012e-06, | |
| "loss": 0.5079, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9562124657454376, | |
| "grad_norm": 0.23792385142576641, | |
| "learning_rate": 5.411401437773773e-06, | |
| "loss": 0.5143, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.9608769167978544, | |
| "grad_norm": 0.24312465281652115, | |
| "learning_rate": 5.347886629344369e-06, | |
| "loss": 0.5107, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.9655413678502711, | |
| "grad_norm": 0.23595847074173298, | |
| "learning_rate": 5.28965632672603e-06, | |
| "loss": 0.5187, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.9702058189026879, | |
| "grad_norm": 0.2479378393066615, | |
| "learning_rate": 5.236724421070693e-06, | |
| "loss": 0.5144, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9748702699551046, | |
| "grad_norm": 0.24722093987308796, | |
| "learning_rate": 5.189103539569195e-06, | |
| "loss": 0.5215, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.9795347210075215, | |
| "grad_norm": 0.23704918121696106, | |
| "learning_rate": 5.146805042438997e-06, | |
| "loss": 0.516, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.9841991720599382, | |
| "grad_norm": 0.2499713377733996, | |
| "learning_rate": 5.10983902021413e-06, | |
| "loss": 0.5086, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.988863623112355, | |
| "grad_norm": 0.24126794921726105, | |
| "learning_rate": 5.078214291338054e-06, | |
| "loss": 0.5263, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.9935280741647717, | |
| "grad_norm": 0.2575581210389819, | |
| "learning_rate": 5.051938400059965e-06, | |
| "loss": 0.5173, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.9981925252171885, | |
| "grad_norm": 0.24967221419990557, | |
| "learning_rate": 5.031017614635082e-06, | |
| "loss": 0.5177, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9991254154276719, | |
| "step": 1071, | |
| "total_flos": 486995857768448.0, | |
| "train_loss": 0.5541811234532507, | |
| "train_runtime": 156721.9484, | |
| "train_samples_per_second": 0.219, | |
| "train_steps_per_second": 0.007 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1071, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 486995857768448.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |