| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 618, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01620745542949757, |
| "grad_norm": 1.2666090726852417, |
| "learning_rate": 1.5384615384615385e-06, |
| "loss": 1.3575, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03241491085899514, |
| "grad_norm": 0.9747076630592346, |
| "learning_rate": 3.4615384615384617e-06, |
| "loss": 1.3319, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04862236628849271, |
| "grad_norm": 0.9286755919456482, |
| "learning_rate": 5.384615384615385e-06, |
| "loss": 1.2949, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06482982171799027, |
| "grad_norm": 0.8217413425445557, |
| "learning_rate": 7.307692307692308e-06, |
| "loss": 1.3087, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08103727714748785, |
| "grad_norm": 0.5836331844329834, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 1.3194, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.09724473257698542, |
| "grad_norm": 0.6233932375907898, |
| "learning_rate": 1.1153846153846154e-05, |
| "loss": 1.2975, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11345218800648298, |
| "grad_norm": 0.5077695250511169, |
| "learning_rate": 1.3076923076923078e-05, |
| "loss": 1.2869, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.12965964343598055, |
| "grad_norm": 0.4717237055301666, |
| "learning_rate": 1.5e-05, |
| "loss": 1.2447, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1458670988654781, |
| "grad_norm": 0.5701112151145935, |
| "learning_rate": 1.6923076923076924e-05, |
| "loss": 1.2602, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1620745542949757, |
| "grad_norm": 0.573088526725769, |
| "learning_rate": 1.8846153846153846e-05, |
| "loss": 1.2537, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.17828200972447325, |
| "grad_norm": 0.6250770092010498, |
| "learning_rate": 2.076923076923077e-05, |
| "loss": 1.2161, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.19448946515397084, |
| "grad_norm": 0.4536466896533966, |
| "learning_rate": 2.269230769230769e-05, |
| "loss": 1.1942, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2106969205834684, |
| "grad_norm": 0.5518207550048828, |
| "learning_rate": 2.4615384615384616e-05, |
| "loss": 1.1764, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.22690437601296595, |
| "grad_norm": 0.49097076058387756, |
| "learning_rate": 2.6538461538461538e-05, |
| "loss": 1.1654, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.24311183144246354, |
| "grad_norm": 0.7466371059417725, |
| "learning_rate": 2.846153846153846e-05, |
| "loss": 1.2078, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2593192868719611, |
| "grad_norm": 0.5528755187988281, |
| "learning_rate": 2.999996560458015e-05, |
| "loss": 1.0975, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2755267423014587, |
| "grad_norm": 0.573396623134613, |
| "learning_rate": 2.999876178144779e-05, |
| "loss": 1.1408, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2917341977309562, |
| "grad_norm": 0.5396841764450073, |
| "learning_rate": 2.9995838345058782e-05, |
| "loss": 1.1354, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3079416531604538, |
| "grad_norm": 0.6083248257637024, |
| "learning_rate": 2.999119563058612e-05, |
| "loss": 1.1492, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3241491085899514, |
| "grad_norm": 0.6934502720832825, |
| "learning_rate": 2.9984834170318635e-05, |
| "loss": 1.1186, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.34035656401944897, |
| "grad_norm": 0.6632962822914124, |
| "learning_rate": 2.9976754693599964e-05, |
| "loss": 1.0607, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3565640194489465, |
| "grad_norm": 0.7568578720092773, |
| "learning_rate": 2.9966958126744923e-05, |
| "loss": 1.1135, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3727714748784441, |
| "grad_norm": 0.8758991956710815, |
| "learning_rate": 2.9955445592933296e-05, |
| "loss": 1.0556, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3889789303079417, |
| "grad_norm": 0.7006877064704895, |
| "learning_rate": 2.994221841208111e-05, |
| "loss": 0.9963, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4051863857374392, |
| "grad_norm": 0.7291510105133057, |
| "learning_rate": 2.9927278100689243e-05, |
| "loss": 1.0383, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4213938411669368, |
| "grad_norm": 0.6858572959899902, |
| "learning_rate": 2.9910626371669593e-05, |
| "loss": 1.0833, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4376012965964344, |
| "grad_norm": 0.788310170173645, |
| "learning_rate": 2.9892265134148686e-05, |
| "loss": 0.9907, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4538087520259319, |
| "grad_norm": 0.6967231631278992, |
| "learning_rate": 2.9872196493248794e-05, |
| "loss": 1.0198, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4700162074554295, |
| "grad_norm": 0.7951614856719971, |
| "learning_rate": 2.9850422749846577e-05, |
| "loss": 0.9352, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4862236628849271, |
| "grad_norm": 0.7685369849205017, |
| "learning_rate": 2.9826946400309295e-05, |
| "loss": 0.9808, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5024311183144247, |
| "grad_norm": 0.8352431654930115, |
| "learning_rate": 2.980177013620858e-05, |
| "loss": 0.9216, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5186385737439222, |
| "grad_norm": 0.827575147151947, |
| "learning_rate": 2.9774896844011887e-05, |
| "loss": 0.9008, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5348460291734197, |
| "grad_norm": 0.8291617035865784, |
| "learning_rate": 2.97463296047515e-05, |
| "loss": 0.8954, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5510534846029174, |
| "grad_norm": 0.7617440223693848, |
| "learning_rate": 2.9716071693671353e-05, |
| "loss": 0.9752, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5672609400324149, |
| "grad_norm": 0.7175273299217224, |
| "learning_rate": 2.9684126579851468e-05, |
| "loss": 0.9052, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5834683954619124, |
| "grad_norm": 0.8096367120742798, |
| "learning_rate": 2.9650497925810266e-05, |
| "loss": 0.9176, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5996758508914101, |
| "grad_norm": 0.91055828332901, |
| "learning_rate": 2.9615189587084628e-05, |
| "loss": 0.925, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6158833063209076, |
| "grad_norm": 0.9182955026626587, |
| "learning_rate": 2.9578205611787877e-05, |
| "loss": 0.9157, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6320907617504052, |
| "grad_norm": 0.9966630935668945, |
| "learning_rate": 2.953955024014565e-05, |
| "loss": 0.8842, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6482982171799028, |
| "grad_norm": 0.7878755927085876, |
| "learning_rate": 2.9499227904009748e-05, |
| "loss": 0.8493, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6645056726094003, |
| "grad_norm": 0.841929018497467, |
| "learning_rate": 2.945724322635004e-05, |
| "loss": 0.7851, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6807131280388979, |
| "grad_norm": 0.8280230164527893, |
| "learning_rate": 2.9413601020724435e-05, |
| "loss": 0.8042, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6969205834683955, |
| "grad_norm": 1.0004469156265259, |
| "learning_rate": 2.9368306290726984e-05, |
| "loss": 0.8014, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.713128038897893, |
| "grad_norm": 1.0178419351577759, |
| "learning_rate": 2.932136422941424e-05, |
| "loss": 0.8118, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7293354943273906, |
| "grad_norm": 0.9624495506286621, |
| "learning_rate": 2.927278021870987e-05, |
| "loss": 0.8164, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7455429497568882, |
| "grad_norm": 0.9296135306358337, |
| "learning_rate": 2.92225598287876e-05, |
| "loss": 0.8124, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7617504051863857, |
| "grad_norm": 0.922747015953064, |
| "learning_rate": 2.9170708817432612e-05, |
| "loss": 0.8082, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7779578606158833, |
| "grad_norm": 1.0309699773788452, |
| "learning_rate": 2.9117233129381393e-05, |
| "loss": 0.7887, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7941653160453809, |
| "grad_norm": 0.9075000882148743, |
| "learning_rate": 2.9062138895640185e-05, |
| "loss": 0.7794, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8103727714748784, |
| "grad_norm": 1.001808524131775, |
| "learning_rate": 2.900543243278206e-05, |
| "loss": 0.7868, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.826580226904376, |
| "grad_norm": 1.077176809310913, |
| "learning_rate": 2.8947120242222706e-05, |
| "loss": 0.7489, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8427876823338736, |
| "grad_norm": 1.210211992263794, |
| "learning_rate": 2.8887209009475064e-05, |
| "loss": 0.7702, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8589951377633711, |
| "grad_norm": 0.9452102780342102, |
| "learning_rate": 2.882570560338281e-05, |
| "loss": 0.7382, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8752025931928687, |
| "grad_norm": 1.0349738597869873, |
| "learning_rate": 2.8762617075332855e-05, |
| "loss": 0.74, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8914100486223663, |
| "grad_norm": 0.987579882144928, |
| "learning_rate": 2.8697950658446884e-05, |
| "loss": 0.7508, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.9076175040518638, |
| "grad_norm": 0.9751459360122681, |
| "learning_rate": 2.8631713766752097e-05, |
| "loss": 0.7146, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9238249594813615, |
| "grad_norm": 1.062954306602478, |
| "learning_rate": 2.8563913994331172e-05, |
| "loss": 0.7068, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.940032414910859, |
| "grad_norm": 1.1477030515670776, |
| "learning_rate": 2.8494559114451605e-05, |
| "loss": 0.6545, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9562398703403565, |
| "grad_norm": 1.0274162292480469, |
| "learning_rate": 2.84236570786745e-05, |
| "loss": 0.6998, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9724473257698542, |
| "grad_norm": 1.110557198524475, |
| "learning_rate": 2.8351216015942933e-05, |
| "loss": 0.6747, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9886547811993517, |
| "grad_norm": 1.1557697057724, |
| "learning_rate": 2.827724423164995e-05, |
| "loss": 0.6515, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.0032414910858996, |
| "grad_norm": 1.0226082801818848, |
| "learning_rate": 2.820175020668635e-05, |
| "loss": 0.6719, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.019448946515397, |
| "grad_norm": 1.129378318786621, |
| "learning_rate": 2.812474259646837e-05, |
| "loss": 0.6247, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.0356564019448946, |
| "grad_norm": 1.1172395944595337, |
| "learning_rate": 2.804623022994531e-05, |
| "loss": 0.6146, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0518638573743921, |
| "grad_norm": 1.1870160102844238, |
| "learning_rate": 2.7966222108587307e-05, |
| "loss": 0.5694, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.0680713128038897, |
| "grad_norm": 1.1029253005981445, |
| "learning_rate": 2.788472740535331e-05, |
| "loss": 0.5751, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0842787682333874, |
| "grad_norm": 1.449513554573059, |
| "learning_rate": 2.780175546363941e-05, |
| "loss": 0.5374, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.100486223662885, |
| "grad_norm": 1.0343148708343506, |
| "learning_rate": 2.7717315796207576e-05, |
| "loss": 0.5688, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.1166936790923825, |
| "grad_norm": 1.0800855159759521, |
| "learning_rate": 2.7631418084095064e-05, |
| "loss": 0.582, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.13290113452188, |
| "grad_norm": 1.2004995346069336, |
| "learning_rate": 2.7544072175504457e-05, |
| "loss": 0.5486, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.1491085899513775, |
| "grad_norm": 1.1442773342132568, |
| "learning_rate": 2.7455288084674565e-05, |
| "loss": 0.5438, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.1653160453808753, |
| "grad_norm": 1.1458081007003784, |
| "learning_rate": 2.7365075990732285e-05, |
| "loss": 0.5537, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.1815235008103728, |
| "grad_norm": 1.1584341526031494, |
| "learning_rate": 2.727344623652558e-05, |
| "loss": 0.5659, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.1977309562398704, |
| "grad_norm": 1.389176845550537, |
| "learning_rate": 2.7180409327437648e-05, |
| "loss": 0.5922, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.213938411669368, |
| "grad_norm": 1.3196877241134644, |
| "learning_rate": 2.708597593018248e-05, |
| "loss": 0.496, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.2301458670988654, |
| "grad_norm": 1.1960793733596802, |
| "learning_rate": 2.6990156871581938e-05, |
| "loss": 0.5452, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.246353322528363, |
| "grad_norm": 1.1414562463760376, |
| "learning_rate": 2.689296313732442e-05, |
| "loss": 0.5696, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.2625607779578605, |
| "grad_norm": 1.173238754272461, |
| "learning_rate": 2.679440587070538e-05, |
| "loss": 0.4971, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.2787682333873582, |
| "grad_norm": 1.245996117591858, |
| "learning_rate": 2.6694496371349723e-05, |
| "loss": 0.5369, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.2949756888168558, |
| "grad_norm": 1.3267337083816528, |
| "learning_rate": 2.6593246093916307e-05, |
| "loss": 0.4883, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.3111831442463533, |
| "grad_norm": 1.1124933958053589, |
| "learning_rate": 2.649066664678467e-05, |
| "loss": 0.4963, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.3273905996758508, |
| "grad_norm": 1.1307417154312134, |
| "learning_rate": 2.638676979072412e-05, |
| "loss": 0.486, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.3435980551053484, |
| "grad_norm": 1.1167956590652466, |
| "learning_rate": 2.6281567437545347e-05, |
| "loss": 0.5253, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.359805510534846, |
| "grad_norm": 1.1781648397445679, |
| "learning_rate": 2.6175071648734752e-05, |
| "loss": 0.5155, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.3760129659643436, |
| "grad_norm": 1.1858192682266235, |
| "learning_rate": 2.606729463407156e-05, |
| "loss": 0.5228, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.3922204213938412, |
| "grad_norm": 1.1593049764633179, |
| "learning_rate": 2.5958248750228018e-05, |
| "loss": 0.5411, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.4084278768233387, |
| "grad_norm": 1.190211296081543, |
| "learning_rate": 2.5847946499352637e-05, |
| "loss": 0.4975, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.4246353322528362, |
| "grad_norm": 1.0861825942993164, |
| "learning_rate": 2.573640052763686e-05, |
| "loss": 0.517, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.440842787682334, |
| "grad_norm": 1.1397205591201782, |
| "learning_rate": 2.5623623623865152e-05, |
| "loss": 0.4718, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.4570502431118315, |
| "grad_norm": 1.305219054222107, |
| "learning_rate": 2.550962871794877e-05, |
| "loss": 0.4437, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.473257698541329, |
| "grad_norm": 1.154456615447998, |
| "learning_rate": 2.5394428879443333e-05, |
| "loss": 0.4669, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.4894651539708266, |
| "grad_norm": 1.1383739709854126, |
| "learning_rate": 2.5278037316050417e-05, |
| "loss": 0.4817, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.505672609400324, |
| "grad_norm": 1.2615864276885986, |
| "learning_rate": 2.516046737210325e-05, |
| "loss": 0.4006, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.5218800648298219, |
| "grad_norm": 1.257574200630188, |
| "learning_rate": 2.5041732527036817e-05, |
| "loss": 0.429, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.5380875202593192, |
| "grad_norm": 1.2761658430099487, |
| "learning_rate": 2.4921846393842414e-05, |
| "loss": 0.4609, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.554294975688817, |
| "grad_norm": 1.2191444635391235, |
| "learning_rate": 2.480082271750692e-05, |
| "loss": 0.4211, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.5705024311183144, |
| "grad_norm": 1.2483482360839844, |
| "learning_rate": 2.4678675373436938e-05, |
| "loss": 0.4495, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.586709886547812, |
| "grad_norm": 1.2474972009658813, |
| "learning_rate": 2.4555418365867965e-05, |
| "loss": 0.471, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.6029173419773097, |
| "grad_norm": 1.5780339241027832, |
| "learning_rate": 2.443106582625879e-05, |
| "loss": 0.4464, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.619124797406807, |
| "grad_norm": 1.1278367042541504, |
| "learning_rate": 2.430563201167136e-05, |
| "loss": 0.3857, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.6353322528363048, |
| "grad_norm": 1.1629952192306519, |
| "learning_rate": 2.4179131303136146e-05, |
| "loss": 0.4811, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.6515397082658023, |
| "grad_norm": 1.193259596824646, |
| "learning_rate": 2.4051578204003405e-05, |
| "loss": 0.3986, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.6677471636952999, |
| "grad_norm": 1.2777222394943237, |
| "learning_rate": 2.3922987338280326e-05, |
| "loss": 0.3735, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.6839546191247974, |
| "grad_norm": 1.2153775691986084, |
| "learning_rate": 2.3793373448954406e-05, |
| "loss": 0.4203, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.700162074554295, |
| "grad_norm": 1.3885631561279297, |
| "learning_rate": 2.366275139630315e-05, |
| "loss": 0.397, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.7163695299837927, |
| "grad_norm": 1.1484588384628296, |
| "learning_rate": 2.3531136156190335e-05, |
| "loss": 0.387, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.73257698541329, |
| "grad_norm": 1.1026580333709717, |
| "learning_rate": 2.3398542818349042e-05, |
| "loss": 0.3726, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.7487844408427877, |
| "grad_norm": 1.323022484779358, |
| "learning_rate": 2.326498658465158e-05, |
| "loss": 0.424, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.7649918962722853, |
| "grad_norm": 1.2276582717895508, |
| "learning_rate": 2.3130482767366614e-05, |
| "loss": 0.4325, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.7811993517017828, |
| "grad_norm": 1.324051022529602, |
| "learning_rate": 2.299504678740359e-05, |
| "loss": 0.3644, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.7974068071312805, |
| "grad_norm": 1.2084301710128784, |
| "learning_rate": 2.2858694172544733e-05, |
| "loss": 0.3649, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.8136142625607778, |
| "grad_norm": 1.2286015748977661, |
| "learning_rate": 2.2721440555664776e-05, |
| "loss": 0.3838, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.8298217179902756, |
| "grad_norm": 1.2307486534118652, |
| "learning_rate": 2.2583301672938648e-05, |
| "loss": 0.3661, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.8460291734197731, |
| "grad_norm": 1.1322344541549683, |
| "learning_rate": 2.2444293362037317e-05, |
| "loss": 0.3824, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.8622366288492707, |
| "grad_norm": 1.1923706531524658, |
| "learning_rate": 2.2304431560311984e-05, |
| "loss": 0.3626, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.8784440842787682, |
| "grad_norm": 1.1578867435455322, |
| "learning_rate": 2.216373230296689e-05, |
| "loss": 0.4028, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.8946515397082657, |
| "grad_norm": 1.2379870414733887, |
| "learning_rate": 2.2022211721220834e-05, |
| "loss": 0.3252, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.9108589951377635, |
| "grad_norm": 1.1819502115249634, |
| "learning_rate": 2.1879886040457755e-05, |
| "loss": 0.3459, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.9270664505672608, |
| "grad_norm": 1.0207310914993286, |
| "learning_rate": 2.1736771578366472e-05, |
| "loss": 0.3855, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.9432739059967585, |
| "grad_norm": 1.2070367336273193, |
| "learning_rate": 2.1592884743069855e-05, |
| "loss": 0.3465, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.959481361426256, |
| "grad_norm": 1.172386646270752, |
| "learning_rate": 2.1448242031243624e-05, |
| "loss": 0.3583, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.9756888168557536, |
| "grad_norm": 1.183351993560791, |
| "learning_rate": 2.1302860026225027e-05, |
| "loss": 0.3374, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.9918962722852513, |
| "grad_norm": 1.4326958656311035, |
| "learning_rate": 2.1156755396111516e-05, |
| "loss": 0.3243, |
| "step": 615 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1545, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.55238528743768e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|