| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 54000, | |
| "global_step": 14319, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.028284098051539912, | |
| "grad_norm": 0.04601588801994645, | |
| "learning_rate": 9.42737430167598e-06, | |
| "loss": 0.1706, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.056568196103079824, | |
| "grad_norm": 0.08599903987598387, | |
| "learning_rate": 1.885474860335196e-05, | |
| "loss": 0.1407, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08485229415461974, | |
| "grad_norm": 0.07811249522270598, | |
| "learning_rate": 2.8282122905027936e-05, | |
| "loss": 0.133, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.11313639220615965, | |
| "grad_norm": 0.08106903455078629, | |
| "learning_rate": 3.770949720670392e-05, | |
| "loss": 0.1302, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.14142049025769957, | |
| "grad_norm": 0.07762084811197388, | |
| "learning_rate": 4.713687150837989e-05, | |
| "loss": 0.128, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.16970458830923948, | |
| "grad_norm": 0.06702784826347409, | |
| "learning_rate": 5.656424581005587e-05, | |
| "loss": 0.1269, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.19798868636077938, | |
| "grad_norm": 0.06832201155426712, | |
| "learning_rate": 6.599162011173185e-05, | |
| "loss": 0.1258, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.2262727844123193, | |
| "grad_norm": 0.07124429027326014, | |
| "learning_rate": 7.541899441340783e-05, | |
| "loss": 0.1251, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2545568824638592, | |
| "grad_norm": 0.059836090071897996, | |
| "learning_rate": 8.48463687150838e-05, | |
| "loss": 0.125, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.28284098051539913, | |
| "grad_norm": 0.06065917805260472, | |
| "learning_rate": 9.427374301675978e-05, | |
| "loss": 0.1244, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.31112507856693905, | |
| "grad_norm": 0.05448515282733843, | |
| "learning_rate": 9.999582667896216e-05, | |
| "loss": 0.1241, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.33940917661847897, | |
| "grad_norm": 0.05195941534715265, | |
| "learning_rate": 9.994749800860066e-05, | |
| "loss": 0.1236, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.36769327467001883, | |
| "grad_norm": 0.0533796560627076, | |
| "learning_rate": 9.984507669983246e-05, | |
| "loss": 0.123, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.39597737272155875, | |
| "grad_norm": 0.05673889785368016, | |
| "learning_rate": 9.968867367390571e-05, | |
| "loss": 0.1222, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.42426147077309867, | |
| "grad_norm": 0.052072201102599504, | |
| "learning_rate": 9.947845831372577e-05, | |
| "loss": 0.1217, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.4525455688246386, | |
| "grad_norm": 0.054891014733737455, | |
| "learning_rate": 9.921465828041518e-05, | |
| "loss": 0.1218, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.4808296668761785, | |
| "grad_norm": 0.05168215652940432, | |
| "learning_rate": 9.889755926675904e-05, | |
| "loss": 0.121, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.5091137649277184, | |
| "grad_norm": 0.051281710130397264, | |
| "learning_rate": 9.85275046878025e-05, | |
| "loss": 0.121, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.5373978629792583, | |
| "grad_norm": 0.05193059833614012, | |
| "learning_rate": 9.810489530893578e-05, | |
| "loss": 0.1205, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.5656819610307983, | |
| "grad_norm": 0.052032831215777654, | |
| "learning_rate": 9.763018881186927e-05, | |
| "loss": 0.1195, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5939660590823381, | |
| "grad_norm": 0.055670900157506434, | |
| "learning_rate": 9.710389929896887e-05, | |
| "loss": 0.1197, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.6222501571338781, | |
| "grad_norm": 0.05191679567872077, | |
| "learning_rate": 9.652659673648816e-05, | |
| "loss": 0.1191, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.650534255185418, | |
| "grad_norm": 0.053751440231836235, | |
| "learning_rate": 9.589890633730087e-05, | |
| "loss": 0.1185, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.6788183532369579, | |
| "grad_norm": 0.05742218717400969, | |
| "learning_rate": 9.522150788380149e-05, | |
| "loss": 0.1181, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.7071024512884978, | |
| "grad_norm": 0.05641344503893988, | |
| "learning_rate": 9.449513499170775e-05, | |
| "loss": 0.118, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.7353865493400377, | |
| "grad_norm": 0.05184155469686776, | |
| "learning_rate": 9.372057431556227e-05, | |
| "loss": 0.1177, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.7636706473915776, | |
| "grad_norm": 0.04878199937516276, | |
| "learning_rate": 9.289866469679355e-05, | |
| "loss": 0.1175, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 0.7919547454431175, | |
| "grad_norm": 0.05310159239349626, | |
| "learning_rate": 9.203029625525912e-05, | |
| "loss": 0.1169, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.8202388434946575, | |
| "grad_norm": 0.0534469001920457, | |
| "learning_rate": 9.111640942525466e-05, | |
| "loss": 0.1175, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 0.8485229415461973, | |
| "grad_norm": 0.05052535987329732, | |
| "learning_rate": 9.015799393703315e-05, | |
| "loss": 0.1169, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.8768070395977373, | |
| "grad_norm": 0.05309924206712465, | |
| "learning_rate": 8.915608774493695e-05, | |
| "loss": 0.1166, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 0.9050911376492772, | |
| "grad_norm": 0.05471927034944372, | |
| "learning_rate": 8.811177590330367e-05, | |
| "loss": 0.1158, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.933375235700817, | |
| "grad_norm": 0.051718680639674705, | |
| "learning_rate": 8.702618939136322e-05, | |
| "loss": 0.1156, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 0.961659333752357, | |
| "grad_norm": 0.055160448975554825, | |
| "learning_rate": 8.590050388839863e-05, | |
| "loss": 0.1155, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.9899434318038969, | |
| "grad_norm": 0.05086520628842916, | |
| "learning_rate": 8.473593850049731e-05, | |
| "loss": 0.1155, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 1.0182275298554369, | |
| "grad_norm": 0.05154998381506173, | |
| "learning_rate": 8.353375444027128e-05, | |
| "loss": 0.1066, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.0465116279069768, | |
| "grad_norm": 0.05189766725554911, | |
| "learning_rate": 8.22952536609767e-05, | |
| "loss": 0.1021, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 1.0747957259585166, | |
| "grad_norm": 0.052617917188326715, | |
| "learning_rate": 8.102177744651149e-05, | |
| "loss": 0.1024, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.1030798240100566, | |
| "grad_norm": 0.05214363158387452, | |
| "learning_rate": 7.971470495881836e-05, | |
| "loss": 0.1025, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 1.1313639220615965, | |
| "grad_norm": 0.054295844912421495, | |
| "learning_rate": 7.837545174426639e-05, | |
| "loss": 0.1023, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.1596480201131363, | |
| "grad_norm": 0.05197457231465077, | |
| "learning_rate": 7.700546820062839e-05, | |
| "loss": 0.1025, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 1.1879321181646763, | |
| "grad_norm": 0.056484265602417545, | |
| "learning_rate": 7.560623800631472e-05, | |
| "loss": 0.1023, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 0.052111946846749885, | |
| "learning_rate": 7.417927651356462e-05, | |
| "loss": 0.1024, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 1.2445003142677562, | |
| "grad_norm": 0.05566575920944282, | |
| "learning_rate": 7.272612910733475e-05, | |
| "loss": 0.1017, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.2727844123192962, | |
| "grad_norm": 0.05811617199922452, | |
| "learning_rate": 7.124836953166298e-05, | |
| "loss": 0.1019, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 1.301068510370836, | |
| "grad_norm": 0.061671271903986996, | |
| "learning_rate": 6.974759818531935e-05, | |
| "loss": 0.1019, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.329352608422376, | |
| "grad_norm": 0.05466121007796382, | |
| "learning_rate": 6.822544038859025e-05, | |
| "loss": 0.1016, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 1.3576367064739157, | |
| "grad_norm": 0.05425557430418602, | |
| "learning_rate": 6.668354462307296e-05, | |
| "loss": 0.1011, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.3859208045254556, | |
| "grad_norm": 0.05874672603708157, | |
| "learning_rate": 6.512358074638657e-05, | |
| "loss": 0.1012, | |
| "step": 6615 | |
| }, | |
| { | |
| "epoch": 1.4142049025769956, | |
| "grad_norm": 0.05823531384414933, | |
| "learning_rate": 6.354723818373301e-05, | |
| "loss": 0.1008, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.4424890006285356, | |
| "grad_norm": 0.05608262826782312, | |
| "learning_rate": 6.195622409826653e-05, | |
| "loss": 0.1007, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 1.4707730986800756, | |
| "grad_norm": 0.05408687704162592, | |
| "learning_rate": 6.035226154225313e-05, | |
| "loss": 0.1002, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.4990571967316153, | |
| "grad_norm": 0.054980789427209784, | |
| "learning_rate": 5.8737087591022275e-05, | |
| "loss": 0.1004, | |
| "step": 7155 | |
| }, | |
| { | |
| "epoch": 1.5273412947831553, | |
| "grad_norm": 0.05745041355343903, | |
| "learning_rate": 5.7112451461731854e-05, | |
| "loss": 0.0999, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.555625392834695, | |
| "grad_norm": 0.05803905423764401, | |
| "learning_rate": 5.5480112618983404e-05, | |
| "loss": 0.0995, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 1.583909490886235, | |
| "grad_norm": 0.056971104280436516, | |
| "learning_rate": 5.384183886933983e-05, | |
| "loss": 0.0997, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.612193588937775, | |
| "grad_norm": 0.056543402577003486, | |
| "learning_rate": 5.2199404446808475e-05, | |
| "loss": 0.0988, | |
| "step": 7695 | |
| }, | |
| { | |
| "epoch": 1.640477686989315, | |
| "grad_norm": 0.05414144319537392, | |
| "learning_rate": 5.0554588091363683e-05, | |
| "loss": 0.0988, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.668761785040855, | |
| "grad_norm": 0.05756652870031753, | |
| "learning_rate": 4.890917112258916e-05, | |
| "loss": 0.0988, | |
| "step": 7965 | |
| }, | |
| { | |
| "epoch": 1.6970458830923947, | |
| "grad_norm": 0.054317396460825465, | |
| "learning_rate": 4.726493551052682e-05, | |
| "loss": 0.0985, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.7253299811439347, | |
| "grad_norm": 0.05780938044176143, | |
| "learning_rate": 4.562366194582113e-05, | |
| "loss": 0.0979, | |
| "step": 8235 | |
| }, | |
| { | |
| "epoch": 1.7536140791954744, | |
| "grad_norm": 0.05615442700243257, | |
| "learning_rate": 4.398712791124905e-05, | |
| "loss": 0.0976, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.7818981772470144, | |
| "grad_norm": 0.0550653325962579, | |
| "learning_rate": 4.235710575672401e-05, | |
| "loss": 0.0975, | |
| "step": 8505 | |
| }, | |
| { | |
| "epoch": 1.8101822752985544, | |
| "grad_norm": 0.055514099512198385, | |
| "learning_rate": 4.073536077985884e-05, | |
| "loss": 0.0974, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.8384663733500943, | |
| "grad_norm": 0.05542114420833896, | |
| "learning_rate": 3.9123649314166065e-05, | |
| "loss": 0.0968, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 1.8667504714016343, | |
| "grad_norm": 0.05466973411282308, | |
| "learning_rate": 3.752371682696652e-05, | |
| "loss": 0.0966, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.895034569453174, | |
| "grad_norm": 0.05615670182195563, | |
| "learning_rate": 3.5937296029065625e-05, | |
| "loss": 0.0967, | |
| "step": 9045 | |
| }, | |
| { | |
| "epoch": 1.923318667504714, | |
| "grad_norm": 0.05525832187066413, | |
| "learning_rate": 3.4366104998245154e-05, | |
| "loss": 0.096, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.9516027655562538, | |
| "grad_norm": 0.05493108241819906, | |
| "learning_rate": 3.28118453186021e-05, | |
| "loss": 0.0957, | |
| "step": 9315 | |
| }, | |
| { | |
| "epoch": 1.9798868636077938, | |
| "grad_norm": 0.05505518318771863, | |
| "learning_rate": 3.1276200237750355e-05, | |
| "loss": 0.0955, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 2.0081709616593337, | |
| "grad_norm": 0.05306378687028771, | |
| "learning_rate": 2.976083284388031e-05, | |
| "loss": 0.0906, | |
| "step": 9585 | |
| }, | |
| { | |
| "epoch": 2.0364550597108737, | |
| "grad_norm": 0.05130179788880526, | |
| "learning_rate": 2.8267384264651188e-05, | |
| "loss": 0.0784, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 2.0647391577624137, | |
| "grad_norm": 0.05335109388781897, | |
| "learning_rate": 2.679747188986622e-05, | |
| "loss": 0.0782, | |
| "step": 9855 | |
| }, | |
| { | |
| "epoch": 2.0930232558139537, | |
| "grad_norm": 0.0513888628362633, | |
| "learning_rate": 2.53526876198557e-05, | |
| "loss": 0.0778, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 2.121307353865493, | |
| "grad_norm": 0.055136967997034346, | |
| "learning_rate": 2.3934596141465028e-05, | |
| "loss": 0.0778, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 2.149591451917033, | |
| "grad_norm": 0.05620080185680983, | |
| "learning_rate": 2.254473323351446e-05, | |
| "loss": 0.0777, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 2.177875549968573, | |
| "grad_norm": 0.05464669812207657, | |
| "learning_rate": 2.1184604103566198e-05, | |
| "loss": 0.0774, | |
| "step": 10395 | |
| }, | |
| { | |
| "epoch": 2.206159648020113, | |
| "grad_norm": 0.05398354993752342, | |
| "learning_rate": 1.9855681757799664e-05, | |
| "loss": 0.0774, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 2.234443746071653, | |
| "grad_norm": 0.055897809997969714, | |
| "learning_rate": 1.8559405405760584e-05, | |
| "loss": 0.0772, | |
| "step": 10665 | |
| }, | |
| { | |
| "epoch": 2.262727844123193, | |
| "grad_norm": 0.05732086913703312, | |
| "learning_rate": 1.729717890171157e-05, | |
| "loss": 0.0767, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.291011942174733, | |
| "grad_norm": 0.0565406180756469, | |
| "learning_rate": 1.607036922427203e-05, | |
| "loss": 0.0765, | |
| "step": 10935 | |
| }, | |
| { | |
| "epoch": 2.3192960402262726, | |
| "grad_norm": 0.057074660751889154, | |
| "learning_rate": 1.4880304995994099e-05, | |
| "loss": 0.0765, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 2.3475801382778125, | |
| "grad_norm": 0.05609805473293312, | |
| "learning_rate": 1.3728275044477673e-05, | |
| "loss": 0.0762, | |
| "step": 11205 | |
| }, | |
| { | |
| "epoch": 2.3758642363293525, | |
| "grad_norm": 0.05516011004588835, | |
| "learning_rate": 1.2615527006583178e-05, | |
| "loss": 0.0763, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 2.4041483343808925, | |
| "grad_norm": 0.05722853773387842, | |
| "learning_rate": 1.1543265977253332e-05, | |
| "loss": 0.0762, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "grad_norm": 0.0587961606821977, | |
| "learning_rate": 1.0512653204407463e-05, | |
| "loss": 0.0757, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 2.4607165304839724, | |
| "grad_norm": 0.0569771841520313, | |
| "learning_rate": 9.524804831321604e-06, | |
| "loss": 0.0759, | |
| "step": 11745 | |
| }, | |
| { | |
| "epoch": 2.4890006285355124, | |
| "grad_norm": 0.059353282770618576, | |
| "learning_rate": 8.580790687856661e-06, | |
| "loss": 0.0756, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 2.517284726587052, | |
| "grad_norm": 0.05454050877182314, | |
| "learning_rate": 7.68163313184333e-06, | |
| "loss": 0.0754, | |
| "step": 12015 | |
| }, | |
| { | |
| "epoch": 2.5455688246385924, | |
| "grad_norm": 0.05815652945221045, | |
| "learning_rate": 6.828305941878904e-06, | |
| "loss": 0.0752, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.573852922690132, | |
| "grad_norm": 0.057648681945551325, | |
| "learning_rate": 6.021733262734758e-06, | |
| "loss": 0.075, | |
| "step": 12285 | |
| }, | |
| { | |
| "epoch": 2.602137020741672, | |
| "grad_norm": 0.05540948539481964, | |
| "learning_rate": 5.262788604516944e-06, | |
| "loss": 0.075, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 2.630421118793212, | |
| "grad_norm": 0.06718456332465579, | |
| "learning_rate": 4.552293896663451e-06, | |
| "loss": 0.075, | |
| "step": 12555 | |
| }, | |
| { | |
| "epoch": 2.658705216844752, | |
| "grad_norm": 0.07105890518243332, | |
| "learning_rate": 3.8910185978029314e-06, | |
| "loss": 0.0748, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 2.686989314896292, | |
| "grad_norm": 0.05567353880578137, | |
| "learning_rate": 3.2796788624387066e-06, | |
| "loss": 0.0748, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 2.7152734129478313, | |
| "grad_norm": 0.05612203710501599, | |
| "learning_rate": 2.71893676536063e-06, | |
| "loss": 0.0749, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 2.7435575109993717, | |
| "grad_norm": 0.05673870284934767, | |
| "learning_rate": 2.209399584624794e-06, | |
| "loss": 0.0748, | |
| "step": 13095 | |
| }, | |
| { | |
| "epoch": 2.7718416090509113, | |
| "grad_norm": 0.05761628274356501, | |
| "learning_rate": 1.7516191438774588e-06, | |
| "loss": 0.0747, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 2.8001257071024512, | |
| "grad_norm": 0.054886010400605305, | |
| "learning_rate": 1.3460912147355787e-06, | |
| "loss": 0.0746, | |
| "step": 13365 | |
| }, | |
| { | |
| "epoch": 2.828409805153991, | |
| "grad_norm": 0.05340561070795086, | |
| "learning_rate": 9.932549798711443e-07, | |
| "loss": 0.0746, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.856693903205531, | |
| "grad_norm": 0.059674346591722494, | |
| "learning_rate": 6.934925573807704e-07, | |
| "loss": 0.0747, | |
| "step": 13635 | |
| }, | |
| { | |
| "epoch": 2.884978001257071, | |
| "grad_norm": 0.058825608417044505, | |
| "learning_rate": 4.4712858695560856e-07, | |
| "loss": 0.0742, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 2.9132620993086107, | |
| "grad_norm": 0.05775417712295375, | |
| "learning_rate": 2.5442987829985556e-07, | |
| "loss": 0.0745, | |
| "step": 13905 | |
| }, | |
| { | |
| "epoch": 2.941546197360151, | |
| "grad_norm": 0.056228990397294835, | |
| "learning_rate": 1.1560512217849707e-07, | |
| "loss": 0.0747, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 2.9698302954116906, | |
| "grad_norm": 0.056212393072501816, | |
| "learning_rate": 3.080466440732455e-08, | |
| "loss": 0.0743, | |
| "step": 14175 | |
| }, | |
| { | |
| "epoch": 2.9981143934632306, | |
| "grad_norm": 0.056538060201727615, | |
| "learning_rate": 1.2034302991903445e-10, | |
| "loss": 0.0745, | |
| "step": 14310 | |
| } | |
| ], | |
| "logging_steps": 135, | |
| "max_steps": 14319, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 54000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.413317465141412e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |