{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6618863761720905, "eval_steps": 300.0, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011031439602868175, "grad_norm": 80.42967325823248, "learning_rate": 2.3e-06, "loss": 1.6655548810958862, "step": 1, "token_acc": 0.6208588957055214 }, { "epoch": 0.005515719801434087, "grad_norm": 12.936447236966638, "learning_rate": 2.3e-06, "loss": 1.6539018154144287, "step": 5, "token_acc": 0.6644312612844984 }, { "epoch": 0.011031439602868174, "grad_norm": 7.485145348211075, "learning_rate": 2.3e-06, "loss": 1.1399970054626465, "step": 10, "token_acc": 0.716547901821061 }, { "epoch": 0.01654715940430226, "grad_norm": 7.10761756326815, "learning_rate": 2.3e-06, "loss": 1.1828346252441406, "step": 15, "token_acc": 0.6928796755295178 }, { "epoch": 0.02206287920573635, "grad_norm": 5.659684098393193, "learning_rate": 2.3e-06, "loss": 1.0604573249816895, "step": 20, "token_acc": 0.7371653156472611 }, { "epoch": 0.027578599007170437, "grad_norm": 6.432203040875114, "learning_rate": 2.3e-06, "loss": 1.0374173164367675, "step": 25, "token_acc": 0.7369089984748348 }, { "epoch": 0.03309431880860452, "grad_norm": 6.0637637354614125, "learning_rate": 2.3e-06, "loss": 0.9596109390258789, "step": 30, "token_acc": 0.7449127906976745 }, { "epoch": 0.03861003861003861, "grad_norm": 5.9792048911606335, "learning_rate": 2.3e-06, "loss": 0.9912214279174805, "step": 35, "token_acc": 0.7413617886178862 }, { "epoch": 0.0441257584114727, "grad_norm": 6.031366828095079, "learning_rate": 2.3e-06, "loss": 1.0473726272583008, "step": 40, "token_acc": 0.727144535840188 }, { "epoch": 0.049641478212906785, "grad_norm": 6.186527950833231, "learning_rate": 2.3e-06, "loss": 0.9992570877075195, "step": 45, "token_acc": 0.7360217714002969 }, { "epoch": 0.05515719801434087, "grad_norm": 5.923085345386395, "learning_rate": 2.3e-06, "loss": 1.002680206298828, "step": 50, "token_acc": 0.7391618497109826 }, { "epoch": 0.06067291781577496, "grad_norm": 4.9260097226563255, "learning_rate": 2.3e-06, "loss": 0.8863202095031738, "step": 55, "token_acc": 0.7527737578388809 }, { "epoch": 0.06618863761720904, "grad_norm": 5.560254131314214, "learning_rate": 2.3e-06, "loss": 0.9248697280883789, "step": 60, "token_acc": 0.7579394848712178 }, { "epoch": 0.07170435741864313, "grad_norm": 5.180260599645047, "learning_rate": 2.3e-06, "loss": 1.004658317565918, "step": 65, "token_acc": 0.7360208062418726 }, { "epoch": 0.07722007722007722, "grad_norm": 6.299333554712354, "learning_rate": 2.3e-06, "loss": 0.9439926147460938, "step": 70, "token_acc": 0.751812046988253 }, { "epoch": 0.0827357970215113, "grad_norm": 5.622631000909693, "learning_rate": 2.3e-06, "loss": 0.8676441192626954, "step": 75, "token_acc": 0.760814889336016 }, { "epoch": 0.0882515168229454, "grad_norm": 5.613397002114218, "learning_rate": 2.3e-06, "loss": 0.8931824684143066, "step": 80, "token_acc": 0.7616038882138517 }, { "epoch": 0.09376723662437948, "grad_norm": 5.897499862699952, "learning_rate": 2.3e-06, "loss": 1.0003661155700683, "step": 85, "token_acc": 0.7361477572559367 }, { "epoch": 0.09928295642581357, "grad_norm": 5.838989842949948, "learning_rate": 2.3e-06, "loss": 0.8919829368591309, "step": 90, "token_acc": 0.7569311663479924 }, { "epoch": 0.10479867622724766, "grad_norm": 5.732772113211347, "learning_rate": 2.3e-06, "loss": 0.9857375144958496, "step": 95, "token_acc": 0.7319040543409042 }, { "epoch": 0.11031439602868175, "grad_norm": 5.887155914575355, "learning_rate": 2.3e-06, "loss": 0.9643180847167969, "step": 100, "token_acc": 0.7468728678644531 }, { "epoch": 0.11583011583011583, "grad_norm": 5.409153898947996, "learning_rate": 2.3e-06, "loss": 0.9775169372558594, "step": 105, "token_acc": 0.7443522360534809 }, { "epoch": 0.12134583563154992, "grad_norm": 5.738347427048291, "learning_rate": 2.3e-06, "loss": 1.0265106201171874, "step": 110, "token_acc": 0.7265641025641025 }, { "epoch": 0.126861555432984, "grad_norm": 5.38681474950728, "learning_rate": 2.3e-06, "loss": 0.9541057586669922, "step": 115, "token_acc": 0.747302805082714 }, { "epoch": 0.13237727523441808, "grad_norm": 5.856303394018206, "learning_rate": 2.3e-06, "loss": 0.8228609085083007, "step": 120, "token_acc": 0.7779299014238773 }, { "epoch": 0.13789299503585217, "grad_norm": 6.568516319919617, "learning_rate": 2.3e-06, "loss": 0.9721288681030273, "step": 125, "token_acc": 0.7395016151361329 }, { "epoch": 0.14340871483728626, "grad_norm": 5.54045872938136, "learning_rate": 2.3e-06, "loss": 0.9882600784301758, "step": 130, "token_acc": 0.7350409836065573 }, { "epoch": 0.14892443463872035, "grad_norm": 5.612320181227532, "learning_rate": 2.3e-06, "loss": 0.9342703819274902, "step": 135, "token_acc": 0.7464104263309035 }, { "epoch": 0.15444015444015444, "grad_norm": 5.925987985108847, "learning_rate": 2.3e-06, "loss": 0.9266027450561524, "step": 140, "token_acc": 0.7487753673897831 }, { "epoch": 0.15995587424158852, "grad_norm": 5.59653045493395, "learning_rate": 2.3e-06, "loss": 0.8937458038330078, "step": 145, "token_acc": 0.7624970664163342 }, { "epoch": 0.1654715940430226, "grad_norm": 5.374039667260155, "learning_rate": 2.3e-06, "loss": 0.8537099838256836, "step": 150, "token_acc": 0.7640309304065852 }, { "epoch": 0.1709873138444567, "grad_norm": 5.376937826461383, "learning_rate": 2.3e-06, "loss": 0.8238465309143066, "step": 155, "token_acc": 0.7761599210266535 }, { "epoch": 0.1765030336458908, "grad_norm": 6.0245180163205285, "learning_rate": 2.3e-06, "loss": 0.8189101219177246, "step": 160, "token_acc": 0.7774302840761012 }, { "epoch": 0.18201875344732488, "grad_norm": 5.571460118689288, "learning_rate": 2.3e-06, "loss": 0.8788368225097656, "step": 165, "token_acc": 0.760459995106435 }, { "epoch": 0.18753447324875896, "grad_norm": 5.088793082374291, "learning_rate": 2.3e-06, "loss": 0.8897226333618165, "step": 170, "token_acc": 0.7589285714285714 }, { "epoch": 0.19305019305019305, "grad_norm": 5.578076711526945, "learning_rate": 2.3e-06, "loss": 0.8561611175537109, "step": 175, "token_acc": 0.7607285429141717 }, { "epoch": 0.19856591285162714, "grad_norm": 5.81713562547314, "learning_rate": 2.3e-06, "loss": 0.8790461540222168, "step": 180, "token_acc": 0.753576372865713 }, { "epoch": 0.20408163265306123, "grad_norm": 5.254765297480428, "learning_rate": 2.3e-06, "loss": 0.7894742965698243, "step": 185, "token_acc": 0.7727930535455861 }, { "epoch": 0.20959735245449532, "grad_norm": 5.076375360468776, "learning_rate": 2.3e-06, "loss": 0.8446966171264648, "step": 190, "token_acc": 0.7672064777327935 }, { "epoch": 0.2151130722559294, "grad_norm": 5.763028979469674, "learning_rate": 2.3e-06, "loss": 0.961794662475586, "step": 195, "token_acc": 0.7398701589433624 }, { "epoch": 0.2206287920573635, "grad_norm": 5.351427081900536, "learning_rate": 2.3e-06, "loss": 0.9339935302734375, "step": 200, "token_acc": 0.7431607506217499 }, { "epoch": 0.22614451185879758, "grad_norm": 5.713651299205875, "learning_rate": 2.3e-06, "loss": 0.918415641784668, "step": 205, "token_acc": 0.7481903926299627 }, { "epoch": 0.23166023166023167, "grad_norm": 5.8891737450130535, "learning_rate": 2.3e-06, "loss": 0.9010303497314454, "step": 210, "token_acc": 0.7571428571428571 }, { "epoch": 0.23717595146166576, "grad_norm": 5.4902683255452915, "learning_rate": 2.3e-06, "loss": 0.8169702529907227, "step": 215, "token_acc": 0.7799597180261832 }, { "epoch": 0.24269167126309985, "grad_norm": 5.777757326601303, "learning_rate": 2.3e-06, "loss": 0.9132566452026367, "step": 220, "token_acc": 0.7451829400303096 }, { "epoch": 0.24820739106453393, "grad_norm": 5.440176086672954, "learning_rate": 2.3e-06, "loss": 0.9655065536499023, "step": 225, "token_acc": 0.7356965174129353 }, { "epoch": 0.253723110865968, "grad_norm": 5.65109550423067, "learning_rate": 2.3e-06, "loss": 0.9338722229003906, "step": 230, "token_acc": 0.740830755634114 }, { "epoch": 0.2592388306674021, "grad_norm": 5.558767495868624, "learning_rate": 2.3e-06, "loss": 0.9302779197692871, "step": 235, "token_acc": 0.7472228044435129 }, { "epoch": 0.26475455046883617, "grad_norm": 5.0652164247691, "learning_rate": 2.3e-06, "loss": 0.8661215782165528, "step": 240, "token_acc": 0.7648968235566891 }, { "epoch": 0.2702702702702703, "grad_norm": 5.301144006085399, "learning_rate": 2.3e-06, "loss": 0.9218810081481934, "step": 245, "token_acc": 0.7493386243386243 }, { "epoch": 0.27578599007170435, "grad_norm": 5.6495062752144625, "learning_rate": 2.3e-06, "loss": 0.7810210227966309, "step": 250, "token_acc": 0.7860394537177542 }, { "epoch": 0.28130170987313846, "grad_norm": 5.522127957882286, "learning_rate": 2.3e-06, "loss": 0.8038553237915039, "step": 255, "token_acc": 0.7727514635444386 }, { "epoch": 0.2868174296745725, "grad_norm": 5.2610053652109405, "learning_rate": 2.3e-06, "loss": 0.8923781394958497, "step": 260, "token_acc": 0.7542778918548939 }, { "epoch": 0.29233314947600664, "grad_norm": 5.613909953834712, "learning_rate": 2.3e-06, "loss": 0.8282554626464844, "step": 265, "token_acc": 0.7708383377372088 }, { "epoch": 0.2978488692774407, "grad_norm": 4.960174308621677, "learning_rate": 2.3e-06, "loss": 0.939533805847168, "step": 270, "token_acc": 0.745945945945946 }, { "epoch": 0.3033645890788748, "grad_norm": 5.338393166552948, "learning_rate": 2.3e-06, "loss": 0.7566696166992187, "step": 275, "token_acc": 0.7783882783882784 }, { "epoch": 0.3088803088803089, "grad_norm": 5.85539301408898, "learning_rate": 2.3e-06, "loss": 0.7774906158447266, "step": 280, "token_acc": 0.778774673160728 }, { "epoch": 0.314396028681743, "grad_norm": 5.5554378762103305, "learning_rate": 2.3e-06, "loss": 0.8302905082702636, "step": 285, "token_acc": 0.7736331119059785 }, { "epoch": 0.31991174848317705, "grad_norm": 5.64607545742485, "learning_rate": 2.3e-06, "loss": 0.8949955940246582, "step": 290, "token_acc": 0.7522200563136235 }, { "epoch": 0.32542746828461117, "grad_norm": 5.545223764081705, "learning_rate": 2.3e-06, "loss": 0.9496533393859863, "step": 295, "token_acc": 0.739816799844085 }, { "epoch": 0.3309431880860452, "grad_norm": 5.102348588101583, "learning_rate": 2.3e-06, "loss": 0.8521288871765137, "step": 300, "token_acc": 0.7637759710930443 }, { "epoch": 0.33645890788747934, "grad_norm": 5.799379698214005, "learning_rate": 2.3e-06, "loss": 0.8561582565307617, "step": 305, "token_acc": 0.7558797909407665 }, { "epoch": 0.3419746276889134, "grad_norm": 5.245842611420519, "learning_rate": 2.3e-06, "loss": 0.855079460144043, "step": 310, "token_acc": 0.7621547591874002 }, { "epoch": 0.3474903474903475, "grad_norm": 5.339700948065717, "learning_rate": 2.3e-06, "loss": 0.8615094184875488, "step": 315, "token_acc": 0.7575689616505943 }, { "epoch": 0.3530060672917816, "grad_norm": 5.608312178321082, "learning_rate": 2.3e-06, "loss": 0.8664688110351563, "step": 320, "token_acc": 0.7657678244972578 }, { "epoch": 0.35852178709321564, "grad_norm": 5.41193963454708, "learning_rate": 2.3e-06, "loss": 0.83935546875, "step": 325, "token_acc": 0.7757989753598439 }, { "epoch": 0.36403750689464975, "grad_norm": 5.220504527588121, "learning_rate": 2.3e-06, "loss": 0.8449893951416015, "step": 330, "token_acc": 0.772705078125 }, { "epoch": 0.3695532266960838, "grad_norm": 5.4190449827537135, "learning_rate": 2.3e-06, "loss": 0.8168981552124024, "step": 335, "token_acc": 0.7692491630798661 }, { "epoch": 0.37506894649751793, "grad_norm": 5.590435008250206, "learning_rate": 2.3e-06, "loss": 0.8648731231689453, "step": 340, "token_acc": 0.7575426322693485 }, { "epoch": 0.380584666298952, "grad_norm": 5.174005161957214, "learning_rate": 2.3e-06, "loss": 0.9896360397338867, "step": 345, "token_acc": 0.7302983932670237 }, { "epoch": 0.3861003861003861, "grad_norm": 5.511715259575185, "learning_rate": 2.3e-06, "loss": 0.8663737297058105, "step": 350, "token_acc": 0.7572226268511775 }, { "epoch": 0.39161610590182017, "grad_norm": 5.295113853870287, "learning_rate": 2.3e-06, "loss": 0.8019681930541992, "step": 355, "token_acc": 0.7747044917257683 }, { "epoch": 0.3971318257032543, "grad_norm": 5.2190425564256415, "learning_rate": 2.3e-06, "loss": 0.8304034233093261, "step": 360, "token_acc": 0.7567389875082183 }, { "epoch": 0.40264754550468834, "grad_norm": 5.594923827868745, "learning_rate": 2.3e-06, "loss": 0.8835367202758789, "step": 365, "token_acc": 0.7492839832562238 }, { "epoch": 0.40816326530612246, "grad_norm": 5.399822330777811, "learning_rate": 2.3e-06, "loss": 0.8703033447265625, "step": 370, "token_acc": 0.7619047619047619 }, { "epoch": 0.4136789851075565, "grad_norm": 5.718517969581787, "learning_rate": 2.3e-06, "loss": 0.7746540069580078, "step": 375, "token_acc": 0.7788509162951956 }, { "epoch": 0.41919470490899063, "grad_norm": 5.358207310874379, "learning_rate": 2.3e-06, "loss": 0.7805415630340576, "step": 380, "token_acc": 0.7823351786612984 }, { "epoch": 0.4247104247104247, "grad_norm": 5.240031526062139, "learning_rate": 2.3e-06, "loss": 0.892514419555664, "step": 385, "token_acc": 0.7574438845625286 }, { "epoch": 0.4302261445118588, "grad_norm": 5.090203604118294, "learning_rate": 2.3e-06, "loss": 0.7661482810974121, "step": 390, "token_acc": 0.7836170737725142 }, { "epoch": 0.43574186431329287, "grad_norm": 5.270675577305129, "learning_rate": 2.3e-06, "loss": 0.7520760059356689, "step": 395, "token_acc": 0.7893167028199566 }, { "epoch": 0.441257584114727, "grad_norm": 5.4625302980431165, "learning_rate": 2.3e-06, "loss": 0.8551373481750488, "step": 400, "token_acc": 0.7630287450338864 }, { "epoch": 0.44677330391616105, "grad_norm": 5.249069454600804, "learning_rate": 2.3e-06, "loss": 0.910746955871582, "step": 405, "token_acc": 0.7516659262549977 }, { "epoch": 0.45228902371759516, "grad_norm": 5.236854652942137, "learning_rate": 2.3e-06, "loss": 0.8199291229248047, "step": 410, "token_acc": 0.7682183767974653 }, { "epoch": 0.4578047435190292, "grad_norm": 6.027185729170618, "learning_rate": 2.3e-06, "loss": 0.8450562477111816, "step": 415, "token_acc": 0.7607934655775963 }, { "epoch": 0.46332046332046334, "grad_norm": 4.870989267188852, "learning_rate": 2.3e-06, "loss": 0.9044759750366211, "step": 420, "token_acc": 0.7498462169366413 }, { "epoch": 0.4688361831218974, "grad_norm": 5.291951695042281, "learning_rate": 2.3e-06, "loss": 0.8467486381530762, "step": 425, "token_acc": 0.7576541612764123 }, { "epoch": 0.4743519029233315, "grad_norm": 5.62593903563829, "learning_rate": 2.3e-06, "loss": 0.8833629608154296, "step": 430, "token_acc": 0.7509285051067781 }, { "epoch": 0.4798676227247656, "grad_norm": 5.128799172526207, "learning_rate": 2.3e-06, "loss": 0.8988676071166992, "step": 435, "token_acc": 0.7490849938999593 }, { "epoch": 0.4853833425261997, "grad_norm": 5.239699648307152, "learning_rate": 2.3e-06, "loss": 0.7166192054748535, "step": 440, "token_acc": 0.7969401947148818 }, { "epoch": 0.49089906232763375, "grad_norm": 5.4681083200632905, "learning_rate": 2.3e-06, "loss": 0.9467007637023925, "step": 445, "token_acc": 0.7396415494314897 }, { "epoch": 0.49641478212906787, "grad_norm": 4.685989886939226, "learning_rate": 2.3e-06, "loss": 0.7727291584014893, "step": 450, "token_acc": 0.7793472966390648 }, { "epoch": 0.5019305019305019, "grad_norm": 5.538988406196133, "learning_rate": 2.3e-06, "loss": 0.8206657409667969, "step": 455, "token_acc": 0.7731393129770993 }, { "epoch": 0.507446221731936, "grad_norm": 4.63939429197835, "learning_rate": 2.3e-06, "loss": 0.8562620162963868, "step": 460, "token_acc": 0.7662946428571429 }, { "epoch": 0.5129619415333702, "grad_norm": 5.1240560151191445, "learning_rate": 2.3e-06, "loss": 0.7872621536254882, "step": 465, "token_acc": 0.7787717601547389 }, { "epoch": 0.5184776613348042, "grad_norm": 5.189600840792027, "learning_rate": 2.3e-06, "loss": 0.8223024368286133, "step": 470, "token_acc": 0.7725631768953068 }, { "epoch": 0.5239933811362383, "grad_norm": 4.625349854858132, "learning_rate": 2.3e-06, "loss": 0.7645779609680176, "step": 475, "token_acc": 0.7779833487511564 }, { "epoch": 0.5295091009376723, "grad_norm": 5.392461676265602, "learning_rate": 2.3e-06, "loss": 0.863577938079834, "step": 480, "token_acc": 0.7583280288563548 }, { "epoch": 0.5350248207391064, "grad_norm": 5.809387834675999, "learning_rate": 2.3e-06, "loss": 0.7966668128967285, "step": 485, "token_acc": 0.7756534365924492 }, { "epoch": 0.5405405405405406, "grad_norm": 5.304450067691333, "learning_rate": 2.3e-06, "loss": 0.9444230079650879, "step": 490, "token_acc": 0.7450345423143351 }, { "epoch": 0.5460562603419746, "grad_norm": 5.583804788469441, "learning_rate": 2.3e-06, "loss": 0.9105037689208985, "step": 495, "token_acc": 0.7439771335238873 }, { "epoch": 0.5515719801434087, "grad_norm": 6.054938244093431, "learning_rate": 2.3e-06, "loss": 0.9297395706176758, "step": 500, "token_acc": 0.7482532288799492 }, { "epoch": 0.5570876999448428, "grad_norm": 5.391503075184599, "learning_rate": 2.3e-06, "loss": 0.846977424621582, "step": 505, "token_acc": 0.7518848526387937 }, { "epoch": 0.5626034197462769, "grad_norm": 5.030658472043065, "learning_rate": 2.3e-06, "loss": 0.8991237640380859, "step": 510, "token_acc": 0.7539121114683816 }, { "epoch": 0.568119139547711, "grad_norm": 5.104958695838692, "learning_rate": 2.3e-06, "loss": 0.8426759719848633, "step": 515, "token_acc": 0.7644656228727025 }, { "epoch": 0.573634859349145, "grad_norm": 6.009882223493915, "learning_rate": 2.3e-06, "loss": 0.7240866661071778, "step": 520, "token_acc": 0.7948649371489703 }, { "epoch": 0.5791505791505791, "grad_norm": 5.637457445639113, "learning_rate": 2.3e-06, "loss": 0.8636373519897461, "step": 525, "token_acc": 0.7649465787679018 }, { "epoch": 0.5846662989520133, "grad_norm": 5.3912830874814555, "learning_rate": 2.3e-06, "loss": 0.8545609474182129, "step": 530, "token_acc": 0.7600548446069469 }, { "epoch": 0.5901820187534473, "grad_norm": 5.223533669476021, "learning_rate": 2.3e-06, "loss": 0.7941525936126709, "step": 535, "token_acc": 0.7761698022190062 }, { "epoch": 0.5956977385548814, "grad_norm": 5.115121860566828, "learning_rate": 2.3e-06, "loss": 0.8650318145751953, "step": 540, "token_acc": 0.7646005817856344 }, { "epoch": 0.6012134583563155, "grad_norm": 4.715835836641467, "learning_rate": 2.3e-06, "loss": 0.8254315376281738, "step": 545, "token_acc": 0.7698187377156585 }, { "epoch": 0.6067291781577496, "grad_norm": 5.200096789536537, "learning_rate": 2.3e-06, "loss": 0.7865041255950928, "step": 550, "token_acc": 0.7788415509813308 }, { "epoch": 0.6122448979591837, "grad_norm": 5.62598549880936, "learning_rate": 2.3e-06, "loss": 0.9002869606018067, "step": 555, "token_acc": 0.7615146147032772 }, { "epoch": 0.6177606177606177, "grad_norm": 6.376332687516541, "learning_rate": 2.3e-06, "loss": 0.8603617668151855, "step": 560, "token_acc": 0.758905299739357 }, { "epoch": 0.6232763375620518, "grad_norm": 5.370356585251875, "learning_rate": 2.3e-06, "loss": 0.8701972961425781, "step": 565, "token_acc": 0.7542935653766888 }, { "epoch": 0.628792057363486, "grad_norm": 4.884591590707582, "learning_rate": 2.3e-06, "loss": 0.840721321105957, "step": 570, "token_acc": 0.7689443714410863 }, { "epoch": 0.63430777716492, "grad_norm": 5.138799986301503, "learning_rate": 2.3e-06, "loss": 0.7376739501953125, "step": 575, "token_acc": 0.7904834996162702 }, { "epoch": 0.6398234969663541, "grad_norm": 5.3450150515914885, "learning_rate": 2.3e-06, "loss": 0.755347728729248, "step": 580, "token_acc": 0.7790931989924433 }, { "epoch": 0.6453392167677882, "grad_norm": 4.895436117128604, "learning_rate": 2.3e-06, "loss": 0.8265247344970703, "step": 585, "token_acc": 0.7729456991830851 }, { "epoch": 0.6508549365692223, "grad_norm": 5.143509534036041, "learning_rate": 2.3e-06, "loss": 0.8215790748596191, "step": 590, "token_acc": 0.7608391608391608 }, { "epoch": 0.6563706563706564, "grad_norm": 5.082286518166302, "learning_rate": 2.3e-06, "loss": 0.8152825355529785, "step": 595, "token_acc": 0.7685413399058929 }, { "epoch": 0.6618863761720905, "grad_norm": 5.326424315651092, "learning_rate": 2.3e-06, "loss": 0.8500395774841308, "step": 600, "token_acc": 0.7704613957801993 } ], "logging_steps": 5, "max_steps": 907, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1514130748997632.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }