{ "best_global_step": 120, "best_metric": 1.0, "best_model_checkpoint": "/projects/bffw/darora1/llm_ipc/final_models/mpi_async_n2/checkpoint-120", "epoch": 1.2281952825013713, "eval_steps": 40, "global_step": 280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00877674163466813, "grad_norm": 45.454673767089844, "learning_rate": 2.0000000000000002e-07, "loss": 2.9332, "step": 2 }, { "epoch": 0.01755348326933626, "grad_norm": 48.028682708740234, "learning_rate": 6.000000000000001e-07, "loss": 2.9542, "step": 4 }, { "epoch": 0.026330224904004388, "grad_norm": 46.46372985839844, "learning_rate": 1.0000000000000002e-06, "loss": 2.9605, "step": 6 }, { "epoch": 0.03510696653867252, "grad_norm": 41.78909683227539, "learning_rate": 1.4000000000000001e-06, "loss": 2.8479, "step": 8 }, { "epoch": 0.04388370817334065, "grad_norm": 38.01380157470703, "learning_rate": 1.8000000000000001e-06, "loss": 2.7259, "step": 10 }, { "epoch": 0.052660449808008776, "grad_norm": 38.49835205078125, "learning_rate": 2.2e-06, "loss": 2.4479, "step": 12 }, { "epoch": 0.061437191442676904, "grad_norm": 25.068660736083984, "learning_rate": 2.6e-06, "loss": 2.0032, "step": 14 }, { "epoch": 0.07021393307734504, "grad_norm": 21.01138687133789, "learning_rate": 3e-06, "loss": 1.4907, "step": 16 }, { "epoch": 0.07899067471201317, "grad_norm": 21.503671646118164, "learning_rate": 3.4000000000000005e-06, "loss": 1.4202, "step": 18 }, { "epoch": 0.0877674163466813, "grad_norm": 17.642776489257812, "learning_rate": 3.8000000000000005e-06, "loss": 0.8484, "step": 20 }, { "epoch": 0.09654415798134942, "grad_norm": 12.313624382019043, "learning_rate": 4.2000000000000004e-06, "loss": 0.6718, "step": 22 }, { "epoch": 0.10532089961601755, "grad_norm": 9.669478416442871, "learning_rate": 4.600000000000001e-06, "loss": 0.5142, "step": 24 }, { "epoch": 0.11409764125068568, "grad_norm": 8.069445610046387, "learning_rate": 5e-06, "loss": 0.3321, "step": 26 }, { "epoch": 0.12287438288535381, "grad_norm": 10.269947052001953, "learning_rate": 5.400000000000001e-06, "loss": 0.195, "step": 28 }, { "epoch": 0.13165112452002195, "grad_norm": 7.683987140655518, "learning_rate": 5.8e-06, "loss": 0.1483, "step": 30 }, { "epoch": 0.14042786615469008, "grad_norm": 4.9717864990234375, "learning_rate": 6.200000000000001e-06, "loss": 0.1094, "step": 32 }, { "epoch": 0.1492046077893582, "grad_norm": 6.056117534637451, "learning_rate": 6.600000000000001e-06, "loss": 0.084, "step": 34 }, { "epoch": 0.15798134942402633, "grad_norm": 7.00730037689209, "learning_rate": 7e-06, "loss": 0.0627, "step": 36 }, { "epoch": 0.16675809105869446, "grad_norm": 5.497600555419922, "learning_rate": 7.4e-06, "loss": 0.0487, "step": 38 }, { "epoch": 0.1755348326933626, "grad_norm": 4.045105457305908, "learning_rate": 7.800000000000002e-06, "loss": 0.0344, "step": 40 }, { "epoch": 0.1755348326933626, "eval_accuracy": 0.9908390662995566, "eval_loss": 0.02551591955125332, "eval_runtime": 26.7939, "eval_samples_per_second": 37.322, "eval_steps_per_second": 18.661, "step": 40 }, { "epoch": 0.18431157432803072, "grad_norm": 3.0727834701538086, "learning_rate": 8.2e-06, "loss": 0.0215, "step": 42 }, { "epoch": 0.19308831596269885, "grad_norm": 1.7004352807998657, "learning_rate": 8.6e-06, "loss": 0.0172, "step": 44 }, { "epoch": 0.20186505759736698, "grad_norm": 31.896760940551758, "learning_rate": 9e-06, "loss": 0.0345, "step": 46 }, { "epoch": 0.2106417992320351, "grad_norm": 4.731893062591553, "learning_rate": 9.4e-06, "loss": 0.0291, "step": 48 }, { "epoch": 0.21941854086670323, "grad_norm": 0.8632926344871521, "learning_rate": 9.800000000000001e-06, "loss": 0.0117, "step": 50 }, { "epoch": 0.22819528250137136, "grad_norm": 1.1372809410095215, "learning_rate": 9.999998993000299e-06, "loss": 0.0096, "step": 52 }, { "epoch": 0.2369720241360395, "grad_norm": 1.35813307762146, "learning_rate": 9.999990937005126e-06, "loss": 0.0074, "step": 54 }, { "epoch": 0.24574876577070762, "grad_norm": 1.8251152038574219, "learning_rate": 9.999974825027756e-06, "loss": 0.0056, "step": 56 }, { "epoch": 0.2545255074053758, "grad_norm": 3.9003660678863525, "learning_rate": 9.999950657094151e-06, "loss": 0.0035, "step": 58 }, { "epoch": 0.2633022490400439, "grad_norm": 0.40562182664871216, "learning_rate": 9.999918433243253e-06, "loss": 0.0023, "step": 60 }, { "epoch": 0.27207899067471203, "grad_norm": 0.4589959383010864, "learning_rate": 9.999878153526974e-06, "loss": 0.0039, "step": 62 }, { "epoch": 0.28085573230938016, "grad_norm": 1.4256044626235962, "learning_rate": 9.99982981801022e-06, "loss": 0.0039, "step": 64 }, { "epoch": 0.2896324739440483, "grad_norm": 1.2097476720809937, "learning_rate": 9.999773426770864e-06, "loss": 0.0008, "step": 66 }, { "epoch": 0.2984092155787164, "grad_norm": 1.3794668912887573, "learning_rate": 9.999708979899769e-06, "loss": 0.0022, "step": 68 }, { "epoch": 0.30718595721338454, "grad_norm": 0.9129151105880737, "learning_rate": 9.999636477500765e-06, "loss": 0.0008, "step": 70 }, { "epoch": 0.31596269884805267, "grad_norm": 2.035865068435669, "learning_rate": 9.999555919690673e-06, "loss": 0.0024, "step": 72 }, { "epoch": 0.3247394404827208, "grad_norm": 0.33494269847869873, "learning_rate": 9.999467306599285e-06, "loss": 0.0009, "step": 74 }, { "epoch": 0.3335161821173889, "grad_norm": 1.9235092401504517, "learning_rate": 9.999370638369377e-06, "loss": 0.0015, "step": 76 }, { "epoch": 0.34229292375205705, "grad_norm": 0.06841372698545456, "learning_rate": 9.999265915156697e-06, "loss": 0.0005, "step": 78 }, { "epoch": 0.3510696653867252, "grad_norm": 3.6089587211608887, "learning_rate": 9.999153137129978e-06, "loss": 0.0013, "step": 80 }, { "epoch": 0.3510696653867252, "eval_accuracy": 0.9955795537921697, "eval_loss": 0.033373381942510605, "eval_runtime": 27.5485, "eval_samples_per_second": 36.3, "eval_steps_per_second": 18.15, "step": 80 }, { "epoch": 0.3598464070213933, "grad_norm": 1.6794105768203735, "learning_rate": 9.999032304470926e-06, "loss": 0.0157, "step": 82 }, { "epoch": 0.36862314865606144, "grad_norm": 4.490398406982422, "learning_rate": 9.998903417374228e-06, "loss": 0.0021, "step": 84 }, { "epoch": 0.37739989029072957, "grad_norm": 0.12031521648168564, "learning_rate": 9.998766476047546e-06, "loss": 0.0003, "step": 86 }, { "epoch": 0.3861766319253977, "grad_norm": 0.055538520216941833, "learning_rate": 9.998621480711522e-06, "loss": 0.0014, "step": 88 }, { "epoch": 0.3949533735600658, "grad_norm": 0.12411190569400787, "learning_rate": 9.998468431599768e-06, "loss": 0.0009, "step": 90 }, { "epoch": 0.40373011519473395, "grad_norm": 0.2620101869106293, "learning_rate": 9.99830732895888e-06, "loss": 0.0002, "step": 92 }, { "epoch": 0.4125068568294021, "grad_norm": 0.02486434578895569, "learning_rate": 9.998138173048424e-06, "loss": 0.0003, "step": 94 }, { "epoch": 0.4212835984640702, "grad_norm": 0.048864614218473434, "learning_rate": 9.997960964140946e-06, "loss": 0.0001, "step": 96 }, { "epoch": 0.43006034009873834, "grad_norm": 0.12352154403924942, "learning_rate": 9.997775702521965e-06, "loss": 0.0002, "step": 98 }, { "epoch": 0.43883708173340646, "grad_norm": 0.02925235591828823, "learning_rate": 9.997582388489975e-06, "loss": 0.0001, "step": 100 }, { "epoch": 0.4476138233680746, "grad_norm": 0.007072501815855503, "learning_rate": 9.99738102235644e-06, "loss": 0.0, "step": 102 }, { "epoch": 0.4563905650027427, "grad_norm": 0.018334778025746346, "learning_rate": 9.997171604445803e-06, "loss": 0.0, "step": 104 }, { "epoch": 0.46516730663741085, "grad_norm": 0.017481721937656403, "learning_rate": 9.99695413509548e-06, "loss": 0.0, "step": 106 }, { "epoch": 0.473944048272079, "grad_norm": 0.02275106869637966, "learning_rate": 9.996728614655854e-06, "loss": 0.0, "step": 108 }, { "epoch": 0.4827207899067471, "grad_norm": 0.006805419921875, "learning_rate": 9.996495043490285e-06, "loss": 0.0, "step": 110 }, { "epoch": 0.49149753154141523, "grad_norm": 0.012757470831274986, "learning_rate": 9.996253421975103e-06, "loss": 0.0, "step": 112 }, { "epoch": 0.5002742731760834, "grad_norm": 0.014855819754302502, "learning_rate": 9.996003750499608e-06, "loss": 0.0, "step": 114 }, { "epoch": 0.5090510148107515, "grad_norm": 0.001869674539193511, "learning_rate": 9.995746029466071e-06, "loss": 0.0, "step": 116 }, { "epoch": 0.5178277564454197, "grad_norm": 0.001857027062214911, "learning_rate": 9.995480259289731e-06, "loss": 0.0, "step": 118 }, { "epoch": 0.5266044980800878, "grad_norm": 0.0015473720850422978, "learning_rate": 9.995206440398798e-06, "loss": 0.0, "step": 120 }, { "epoch": 0.5266044980800878, "eval_accuracy": 1.0, "eval_loss": 9.006173968373332e-06, "eval_runtime": 27.2815, "eval_samples_per_second": 36.655, "eval_steps_per_second": 18.327, "step": 120 }, { "epoch": 0.5353812397147559, "grad_norm": 0.0015787497395649552, "learning_rate": 9.994924573234448e-06, "loss": 0.0, "step": 122 }, { "epoch": 0.5441579813494241, "grad_norm": 0.0009497110149823129, "learning_rate": 9.994634658250825e-06, "loss": 0.0, "step": 124 }, { "epoch": 0.5529347229840922, "grad_norm": 0.010696510784327984, "learning_rate": 9.994336695915041e-06, "loss": 0.0, "step": 126 }, { "epoch": 0.5617114646187603, "grad_norm": 0.0011760975467041135, "learning_rate": 9.994030686707171e-06, "loss": 0.0, "step": 128 }, { "epoch": 0.5704882062534284, "grad_norm": 0.0009388104663230479, "learning_rate": 9.993716631120259e-06, "loss": 0.0, "step": 130 }, { "epoch": 0.5792649478880966, "grad_norm": 0.0007937882328405976, "learning_rate": 9.993394529660307e-06, "loss": 0.0, "step": 132 }, { "epoch": 0.5880416895227647, "grad_norm": 0.0007054200395941734, "learning_rate": 9.99306438284629e-06, "loss": 0.0, "step": 134 }, { "epoch": 0.5968184311574328, "grad_norm": 0.0007900617201812565, "learning_rate": 9.992726191210139e-06, "loss": 0.0, "step": 136 }, { "epoch": 0.605595172792101, "grad_norm": 0.0006902568857185543, "learning_rate": 9.992379955296745e-06, "loss": 0.0, "step": 138 }, { "epoch": 0.6143719144267691, "grad_norm": 0.0007422619964927435, "learning_rate": 9.992025675663966e-06, "loss": 0.0, "step": 140 }, { "epoch": 0.6231486560614372, "grad_norm": 0.03101375699043274, "learning_rate": 9.991663352882615e-06, "loss": 0.0, "step": 142 }, { "epoch": 0.6319253976961053, "grad_norm": 0.000703384168446064, "learning_rate": 9.991292987536469e-06, "loss": 0.0, "step": 144 }, { "epoch": 0.6407021393307735, "grad_norm": 0.000645720399916172, "learning_rate": 9.990914580222258e-06, "loss": 0.0, "step": 146 }, { "epoch": 0.6494788809654416, "grad_norm": 0.0005880170501768589, "learning_rate": 9.990528131549674e-06, "loss": 0.0, "step": 148 }, { "epoch": 0.6582556226001097, "grad_norm": 0.0005776435136795044, "learning_rate": 9.990133642141359e-06, "loss": 0.0, "step": 150 }, { "epoch": 0.6670323642347779, "grad_norm": 0.0005855935742147267, "learning_rate": 9.989731112632917e-06, "loss": 0.0, "step": 152 }, { "epoch": 0.675809105869446, "grad_norm": 0.0005201378371566534, "learning_rate": 9.989320543672904e-06, "loss": 0.0, "step": 154 }, { "epoch": 0.6845858475041141, "grad_norm": 0.0004889736883342266, "learning_rate": 9.988901935922826e-06, "loss": 0.0, "step": 156 }, { "epoch": 0.6933625891387822, "grad_norm": 0.0014288433594629169, "learning_rate": 9.988475290057145e-06, "loss": 0.0, "step": 158 }, { "epoch": 0.7021393307734504, "grad_norm": 0.000509511970449239, "learning_rate": 9.988040606763272e-06, "loss": 0.0, "step": 160 }, { "epoch": 0.7021393307734504, "eval_accuracy": 1.0, "eval_loss": 4.382986389828147e-06, "eval_runtime": 27.0687, "eval_samples_per_second": 36.943, "eval_steps_per_second": 18.471, "step": 160 }, { "epoch": 0.7109160724081185, "grad_norm": 0.0005291936104185879, "learning_rate": 9.98759788674157e-06, "loss": 0.0, "step": 162 }, { "epoch": 0.7196928140427866, "grad_norm": 0.0011958482209593058, "learning_rate": 9.987147130705347e-06, "loss": 0.0, "step": 164 }, { "epoch": 0.7284695556774547, "grad_norm": 0.000418210169300437, "learning_rate": 9.986688339380863e-06, "loss": 0.0, "step": 166 }, { "epoch": 0.7372462973121229, "grad_norm": 0.00041748053627088666, "learning_rate": 9.98622151350732e-06, "loss": 0.0, "step": 168 }, { "epoch": 0.746023038946791, "grad_norm": 0.0004505082033574581, "learning_rate": 9.985746653836867e-06, "loss": 0.0, "step": 170 }, { "epoch": 0.7547997805814591, "grad_norm": 0.00039788082358427346, "learning_rate": 9.985263761134602e-06, "loss": 0.0, "step": 172 }, { "epoch": 0.7635765222161273, "grad_norm": 0.0004020440683234483, "learning_rate": 9.984772836178559e-06, "loss": 0.0, "step": 174 }, { "epoch": 0.7723532638507954, "grad_norm": 0.0007427522796206176, "learning_rate": 9.984273879759713e-06, "loss": 0.0, "step": 176 }, { "epoch": 0.7811300054854635, "grad_norm": 0.0009939726442098618, "learning_rate": 9.983766892681985e-06, "loss": 0.0, "step": 178 }, { "epoch": 0.7899067471201316, "grad_norm": 0.0003770038892980665, "learning_rate": 9.983251875762234e-06, "loss": 0.0, "step": 180 }, { "epoch": 0.7986834887547998, "grad_norm": 0.0003830124333035201, "learning_rate": 9.982728829830252e-06, "loss": 0.0, "step": 182 }, { "epoch": 0.8074602303894679, "grad_norm": 0.0003769229515455663, "learning_rate": 9.982197755728771e-06, "loss": 0.0, "step": 184 }, { "epoch": 0.816236972024136, "grad_norm": 0.00034873795812018216, "learning_rate": 9.981658654313458e-06, "loss": 0.0, "step": 186 }, { "epoch": 0.8250137136588042, "grad_norm": 0.0006776470108889043, "learning_rate": 9.981111526452912e-06, "loss": 0.0, "step": 188 }, { "epoch": 0.8337904552934723, "grad_norm": 0.0003451018419582397, "learning_rate": 9.980556373028665e-06, "loss": 0.0, "step": 190 }, { "epoch": 0.8425671969281404, "grad_norm": 0.0005146560142748058, "learning_rate": 9.979993194935182e-06, "loss": 0.0, "step": 192 }, { "epoch": 0.8513439385628085, "grad_norm": 0.0003344170399941504, "learning_rate": 9.979421993079853e-06, "loss": 0.0, "step": 194 }, { "epoch": 0.8601206801974767, "grad_norm": 0.00037026862264610827, "learning_rate": 9.978842768382999e-06, "loss": 0.0, "step": 196 }, { "epoch": 0.8688974218321448, "grad_norm": 0.00033089827047660947, "learning_rate": 9.978255521777865e-06, "loss": 0.0, "step": 198 }, { "epoch": 0.8776741634668129, "grad_norm": 0.0003343552234582603, "learning_rate": 9.977660254210623e-06, "loss": 0.0, "step": 200 }, { "epoch": 0.8776741634668129, "eval_accuracy": 1.0, "eval_loss": 3.085134494540398e-06, "eval_runtime": 26.6311, "eval_samples_per_second": 37.55, "eval_steps_per_second": 18.775, "step": 200 }, { "epoch": 0.8864509051014811, "grad_norm": 0.0003209022688679397, "learning_rate": 9.977056966640368e-06, "loss": 0.0, "step": 202 }, { "epoch": 0.8952276467361492, "grad_norm": 0.0003875560942105949, "learning_rate": 9.976445660039118e-06, "loss": 0.0, "step": 204 }, { "epoch": 0.9040043883708173, "grad_norm": 0.000311438663629815, "learning_rate": 9.975826335391808e-06, "loss": 0.0, "step": 206 }, { "epoch": 0.9127811300054854, "grad_norm": 0.00031096808379516006, "learning_rate": 9.975198993696294e-06, "loss": 0.0, "step": 208 }, { "epoch": 0.9215578716401536, "grad_norm": 0.0005075507797300816, "learning_rate": 9.974563635963348e-06, "loss": 0.0, "step": 210 }, { "epoch": 0.9303346132748217, "grad_norm": 0.00029596491367556155, "learning_rate": 9.973920263216658e-06, "loss": 0.0, "step": 212 }, { "epoch": 0.9391113549094898, "grad_norm": 0.000583572022151202, "learning_rate": 9.973268876492827e-06, "loss": 0.0, "step": 214 }, { "epoch": 0.947888096544158, "grad_norm": 0.00031335651874542236, "learning_rate": 9.972609476841368e-06, "loss": 0.0, "step": 216 }, { "epoch": 0.9566648381788261, "grad_norm": 0.0003140955523122102, "learning_rate": 9.971942065324704e-06, "loss": 0.0, "step": 218 }, { "epoch": 0.9654415798134942, "grad_norm": 0.00029426993569359183, "learning_rate": 9.971266643018171e-06, "loss": 0.0, "step": 220 }, { "epoch": 0.9742183214481623, "grad_norm": 0.0005635425914078951, "learning_rate": 9.970583211010008e-06, "loss": 0.0, "step": 222 }, { "epoch": 0.9829950630828305, "grad_norm": 0.0002683993661776185, "learning_rate": 9.969891770401358e-06, "loss": 0.0, "step": 224 }, { "epoch": 0.9917718047174986, "grad_norm": 0.00028738679247908294, "learning_rate": 9.969192322306271e-06, "loss": 0.0, "step": 226 }, { "epoch": 1.0, "grad_norm": 0.00046732599730603397, "learning_rate": 9.968484867851698e-06, "loss": 0.0, "step": 228 }, { "epoch": 1.0087767416346682, "grad_norm": 0.0002722733770497143, "learning_rate": 9.96776940817749e-06, "loss": 0.0, "step": 230 }, { "epoch": 1.0175534832693363, "grad_norm": 0.00028377954731695354, "learning_rate": 9.967045944436392e-06, "loss": 0.0, "step": 232 }, { "epoch": 1.0263302249040045, "grad_norm": 0.0002573099918663502, "learning_rate": 9.966314477794052e-06, "loss": 0.0, "step": 234 }, { "epoch": 1.0351069665386725, "grad_norm": 0.0004235700471326709, "learning_rate": 9.965575009429006e-06, "loss": 0.0, "step": 236 }, { "epoch": 1.0438837081733408, "grad_norm": 0.0007151885074563324, "learning_rate": 9.964827540532685e-06, "loss": 0.0, "step": 238 }, { "epoch": 1.0526604498080088, "grad_norm": 0.0002589399227872491, "learning_rate": 9.964072072309412e-06, "loss": 0.0, "step": 240 }, { "epoch": 1.0526604498080088, "eval_accuracy": 1.0, "eval_loss": 2.4207340629800456e-06, "eval_runtime": 29.8042, "eval_samples_per_second": 33.552, "eval_steps_per_second": 16.776, "step": 240 }, { "epoch": 1.061437191442677, "grad_norm": 0.00024218352336902171, "learning_rate": 9.963308605976397e-06, "loss": 0.0, "step": 242 }, { "epoch": 1.070213933077345, "grad_norm": 0.0003983522765338421, "learning_rate": 9.962537142763733e-06, "loss": 0.0, "step": 244 }, { "epoch": 1.0789906747120133, "grad_norm": 0.00023251764650922269, "learning_rate": 9.961757683914406e-06, "loss": 0.0, "step": 246 }, { "epoch": 1.0877674163466813, "grad_norm": 0.0002750585845205933, "learning_rate": 9.960970230684276e-06, "loss": 0.0, "step": 248 }, { "epoch": 1.0965441579813495, "grad_norm": 0.0002548044722061604, "learning_rate": 9.96017478434209e-06, "loss": 0.0, "step": 250 }, { "epoch": 1.1053208996160175, "grad_norm": 0.00024573353584855795, "learning_rate": 9.959371346169466e-06, "loss": 0.0, "step": 252 }, { "epoch": 1.1140976412506858, "grad_norm": 0.0003012689994648099, "learning_rate": 9.958559917460909e-06, "loss": 0.0, "step": 254 }, { "epoch": 1.1228743828853538, "grad_norm": 0.00022236473159864545, "learning_rate": 9.957740499523787e-06, "loss": 0.0, "step": 256 }, { "epoch": 1.131651124520022, "grad_norm": 0.00022309472842607647, "learning_rate": 9.95691309367835e-06, "loss": 0.0, "step": 258 }, { "epoch": 1.14042786615469, "grad_norm": 0.00036199891474097967, "learning_rate": 9.95607770125771e-06, "loss": 0.0, "step": 260 }, { "epoch": 1.1492046077893583, "grad_norm": 0.0002264819631818682, "learning_rate": 9.955234323607854e-06, "loss": 0.0, "step": 262 }, { "epoch": 1.1579813494240263, "grad_norm": 0.0002222947805421427, "learning_rate": 9.954382962087628e-06, "loss": 0.0, "step": 264 }, { "epoch": 1.1667580910586945, "grad_norm": 0.0002369284484302625, "learning_rate": 9.95352361806875e-06, "loss": 0.0, "step": 266 }, { "epoch": 1.1755348326933626, "grad_norm": 0.0002466253063175827, "learning_rate": 9.95265629293579e-06, "loss": 0.0, "step": 268 }, { "epoch": 1.1843115743280308, "grad_norm": 0.0002238057932117954, "learning_rate": 9.951780988086183e-06, "loss": 0.0, "step": 270 }, { "epoch": 1.1930883159626988, "grad_norm": 0.00020709357340820134, "learning_rate": 9.950897704930223e-06, "loss": 0.0, "step": 272 }, { "epoch": 1.201865057597367, "grad_norm": 0.000194857973838225, "learning_rate": 9.95000644489105e-06, "loss": 0.0, "step": 274 }, { "epoch": 1.210641799232035, "grad_norm": 0.00019957115000579506, "learning_rate": 9.949107209404664e-06, "loss": 0.0, "step": 276 }, { "epoch": 1.2194185408667033, "grad_norm": 0.00020521250553429127, "learning_rate": 9.948199999919914e-06, "loss": 0.0, "step": 278 }, { "epoch": 1.2281952825013713, "grad_norm": 0.0002061008126474917, "learning_rate": 9.947284817898493e-06, "loss": 0.0, "step": 280 }, { "epoch": 1.2281952825013713, "eval_accuracy": 1.0, "eval_loss": 1.949304987647338e-06, "eval_runtime": 29.4785, "eval_samples_per_second": 33.923, "eval_steps_per_second": 16.962, "step": 280 } ], "logging_steps": 2, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 22, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0478480881811456e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }