| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9990167158308751, | |
| "eval_steps": 500, | |
| "global_step": 508, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0019665683382497543, | |
| "grad_norm": 2.135781083866852, | |
| "learning_rate": 1.9607843137254904e-07, | |
| "loss": 0.7446, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00983284169124877, | |
| "grad_norm": 1.9527848313268972, | |
| "learning_rate": 9.80392156862745e-07, | |
| "loss": 0.7167, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01966568338249754, | |
| "grad_norm": 0.8184732371481878, | |
| "learning_rate": 1.96078431372549e-06, | |
| "loss": 0.6108, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.029498525073746312, | |
| "grad_norm": 1.5660153049042755, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 0.3794, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03933136676499508, | |
| "grad_norm": 0.26637453068050115, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": 0.1959, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.049164208456243856, | |
| "grad_norm": 0.1832216140433864, | |
| "learning_rate": 4.901960784313726e-06, | |
| "loss": 0.1462, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.058997050147492625, | |
| "grad_norm": 0.1221198931949252, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.1201, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0688298918387414, | |
| "grad_norm": 0.09902770217247771, | |
| "learning_rate": 6.862745098039216e-06, | |
| "loss": 0.109, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07866273352999016, | |
| "grad_norm": 0.09185267777375979, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 0.1015, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08849557522123894, | |
| "grad_norm": 0.09308570802648539, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 0.0911, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09832841691248771, | |
| "grad_norm": 0.10738807987147182, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": 0.0882, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10816125860373647, | |
| "grad_norm": 0.08239929893607759, | |
| "learning_rate": 9.998109833891883e-06, | |
| "loss": 0.0853, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11799410029498525, | |
| "grad_norm": 0.08167501755971386, | |
| "learning_rate": 9.990433483284527e-06, | |
| "loss": 0.077, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.127826941986234, | |
| "grad_norm": 0.059384138425133216, | |
| "learning_rate": 9.976861873982177e-06, | |
| "loss": 0.0771, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1376597836774828, | |
| "grad_norm": 0.079566577422055, | |
| "learning_rate": 9.95741103828905e-06, | |
| "loss": 0.0751, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14749262536873156, | |
| "grad_norm": 0.0828234494820707, | |
| "learning_rate": 9.932103953709724e-06, | |
| "loss": 0.0672, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.15732546705998032, | |
| "grad_norm": 0.06187844995145561, | |
| "learning_rate": 9.900970515805564e-06, | |
| "loss": 0.0746, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1671583087512291, | |
| "grad_norm": 0.0479978084973539, | |
| "learning_rate": 9.864047502878717e-06, | |
| "loss": 0.0662, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.17699115044247787, | |
| "grad_norm": 0.05889595843389217, | |
| "learning_rate": 9.821378532525479e-06, | |
| "loss": 0.0701, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18682399213372664, | |
| "grad_norm": 0.05716126856265098, | |
| "learning_rate": 9.773014010110298e-06, | |
| "loss": 0.0682, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.19665683382497542, | |
| "grad_norm": 0.05712073501290371, | |
| "learning_rate": 9.719011069221316e-06, | |
| "loss": 0.068, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20648967551622419, | |
| "grad_norm": 0.048973712798623205, | |
| "learning_rate": 9.659433504177786e-06, | |
| "loss": 0.0606, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.21632251720747295, | |
| "grad_norm": 0.054348545387946076, | |
| "learning_rate": 9.59435169466907e-06, | |
| "loss": 0.0632, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.22615535889872174, | |
| "grad_norm": 0.04960893787310499, | |
| "learning_rate": 9.523842522614285e-06, | |
| "loss": 0.0673, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2359882005899705, | |
| "grad_norm": 0.04789803334572793, | |
| "learning_rate": 9.447989281340753e-06, | |
| "loss": 0.0656, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.24582104228121926, | |
| "grad_norm": 0.04982130031713185, | |
| "learning_rate": 9.36688157718862e-06, | |
| "loss": 0.0663, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.255653883972468, | |
| "grad_norm": 0.0526672200331979, | |
| "learning_rate": 9.280615223657801e-06, | |
| "loss": 0.0616, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.26548672566371684, | |
| "grad_norm": 0.04814394198670065, | |
| "learning_rate": 9.189292128222355e-06, | |
| "loss": 0.0644, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2753195673549656, | |
| "grad_norm": 0.04944588595233738, | |
| "learning_rate": 9.093020171945966e-06, | |
| "loss": 0.0634, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.28515240904621436, | |
| "grad_norm": 0.050524994827668604, | |
| "learning_rate": 8.991913082040752e-06, | |
| "loss": 0.0604, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2949852507374631, | |
| "grad_norm": 0.05719051920149237, | |
| "learning_rate": 8.886090297519956e-06, | |
| "loss": 0.0613, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3048180924287119, | |
| "grad_norm": 0.05055060203841477, | |
| "learning_rate": 8.775676828103205e-06, | |
| "loss": 0.0595, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.31465093411996065, | |
| "grad_norm": 0.051603882559381635, | |
| "learning_rate": 8.660803106541044e-06, | |
| "loss": 0.0576, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.32448377581120946, | |
| "grad_norm": 0.04922472616683421, | |
| "learning_rate": 8.541604834533159e-06, | |
| "loss": 0.0587, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3343166175024582, | |
| "grad_norm": 0.05056885581987987, | |
| "learning_rate": 8.418222822422348e-06, | |
| "loss": 0.0571, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.344149459193707, | |
| "grad_norm": 0.04783324139240484, | |
| "learning_rate": 8.290802822853576e-06, | |
| "loss": 0.061, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.35398230088495575, | |
| "grad_norm": 0.04560236504307446, | |
| "learning_rate": 8.159495358594627e-06, | |
| "loss": 0.0564, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3638151425762045, | |
| "grad_norm": 0.046817213325705334, | |
| "learning_rate": 8.024455544721778e-06, | |
| "loss": 0.058, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.37364798426745327, | |
| "grad_norm": 0.04895904933788684, | |
| "learning_rate": 7.88584290538049e-06, | |
| "loss": 0.0596, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3834808259587021, | |
| "grad_norm": 0.050434848029315464, | |
| "learning_rate": 7.743821185337634e-06, | |
| "loss": 0.0559, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.39331366764995085, | |
| "grad_norm": 0.047900027973270344, | |
| "learning_rate": 7.598558156547842e-06, | |
| "loss": 0.0527, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4031465093411996, | |
| "grad_norm": 0.04478166708260688, | |
| "learning_rate": 7.450225419962498e-06, | |
| "loss": 0.0521, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.41297935103244837, | |
| "grad_norm": 0.043916229955916784, | |
| "learning_rate": 7.298998202815474e-06, | |
| "loss": 0.0522, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.42281219272369713, | |
| "grad_norm": 0.050206544286328425, | |
| "learning_rate": 7.145055151625113e-06, | |
| "loss": 0.0566, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4326450344149459, | |
| "grad_norm": 0.0504217577684078, | |
| "learning_rate": 6.988578121156956e-06, | |
| "loss": 0.0534, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4424778761061947, | |
| "grad_norm": 0.04954740220141779, | |
| "learning_rate": 6.829751959596544e-06, | |
| "loss": 0.0538, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4523107177974435, | |
| "grad_norm": 0.04385337686115303, | |
| "learning_rate": 6.668764290186039e-06, | |
| "loss": 0.0561, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.46214355948869223, | |
| "grad_norm": 0.05211980237057351, | |
| "learning_rate": 6.50580528958265e-06, | |
| "loss": 0.0547, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.471976401179941, | |
| "grad_norm": 0.047168324719840615, | |
| "learning_rate": 6.341067463200678e-06, | |
| "loss": 0.053, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.48180924287118976, | |
| "grad_norm": 0.04844095619488966, | |
| "learning_rate": 6.174745417802563e-06, | |
| "loss": 0.0525, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4916420845624385, | |
| "grad_norm": 0.05162902137580191, | |
| "learning_rate": 6.007035631607605e-06, | |
| "loss": 0.0521, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5014749262536873, | |
| "grad_norm": 0.044264598715667285, | |
| "learning_rate": 5.838136222189874e-06, | |
| "loss": 0.0517, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.511307767944936, | |
| "grad_norm": 0.04516063975011773, | |
| "learning_rate": 5.668246712439579e-06, | |
| "loss": 0.0492, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5211406096361848, | |
| "grad_norm": 0.04569130289191897, | |
| "learning_rate": 5.4975677948642704e-06, | |
| "loss": 0.0489, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5309734513274337, | |
| "grad_norm": 0.04742268284655076, | |
| "learning_rate": 5.3263010945083994e-06, | |
| "loss": 0.0541, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5408062930186824, | |
| "grad_norm": 0.04736036558295303, | |
| "learning_rate": 5.1546489307712345e-06, | |
| "loss": 0.0499, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5506391347099312, | |
| "grad_norm": 0.05269001843059825, | |
| "learning_rate": 4.982814078404543e-06, | |
| "loss": 0.0525, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.56047197640118, | |
| "grad_norm": 0.04513439132816848, | |
| "learning_rate": 4.8109995279723556e-06, | |
| "loss": 0.0532, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5703048180924287, | |
| "grad_norm": 0.046670647721283355, | |
| "learning_rate": 4.639408246055781e-06, | |
| "loss": 0.0495, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5801376597836775, | |
| "grad_norm": 0.05146253698361455, | |
| "learning_rate": 4.468242935486164e-06, | |
| "loss": 0.0523, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5899705014749262, | |
| "grad_norm": 0.04480174522261988, | |
| "learning_rate": 4.29770579588981e-06, | |
| "loss": 0.0477, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.599803343166175, | |
| "grad_norm": 0.051094616814280076, | |
| "learning_rate": 4.127998284827148e-06, | |
| "loss": 0.0448, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6096361848574238, | |
| "grad_norm": 0.049760706749433746, | |
| "learning_rate": 3.9593208798085094e-06, | |
| "loss": 0.0491, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6194690265486725, | |
| "grad_norm": 0.05365560094468974, | |
| "learning_rate": 3.791872841467643e-06, | |
| "loss": 0.0492, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6293018682399213, | |
| "grad_norm": 0.0458330880453793, | |
| "learning_rate": 3.625851978172765e-06, | |
| "loss": 0.0485, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.63913470993117, | |
| "grad_norm": 0.04942796466626996, | |
| "learning_rate": 3.4614544123531476e-06, | |
| "loss": 0.0458, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6489675516224189, | |
| "grad_norm": 0.048056634965656306, | |
| "learning_rate": 3.29887434881737e-06, | |
| "loss": 0.0471, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6588003933136677, | |
| "grad_norm": 0.048666333381827916, | |
| "learning_rate": 3.138303845336844e-06, | |
| "loss": 0.0461, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6686332350049164, | |
| "grad_norm": 0.048562572719718075, | |
| "learning_rate": 2.9799325857656856e-06, | |
| "loss": 0.0483, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6784660766961652, | |
| "grad_norm": 0.05367782611447185, | |
| "learning_rate": 2.8239476559649013e-06, | |
| "loss": 0.0421, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.688298918387414, | |
| "grad_norm": 0.049074863626744465, | |
| "learning_rate": 2.6705333227956304e-06, | |
| "loss": 0.0478, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6981317600786627, | |
| "grad_norm": 0.0443538584337734, | |
| "learning_rate": 2.5198708164425046e-06, | |
| "loss": 0.0458, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7079646017699115, | |
| "grad_norm": 0.057247973053718454, | |
| "learning_rate": 2.372138116324254e-06, | |
| "loss": 0.043, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7177974434611603, | |
| "grad_norm": 0.04662349074304718, | |
| "learning_rate": 2.227509740844508e-06, | |
| "loss": 0.0425, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.727630285152409, | |
| "grad_norm": 0.052705557545011036, | |
| "learning_rate": 2.086156541231109e-06, | |
| "loss": 0.0464, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7374631268436578, | |
| "grad_norm": 0.0488739451266495, | |
| "learning_rate": 1.948245499707523e-06, | |
| "loss": 0.0452, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7472959685349065, | |
| "grad_norm": 0.054383482928176255, | |
| "learning_rate": 1.8139395322347335e-06, | |
| "loss": 0.0449, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7571288102261554, | |
| "grad_norm": 0.048471644593832505, | |
| "learning_rate": 1.6833972960566868e-06, | |
| "loss": 0.0462, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7669616519174042, | |
| "grad_norm": 0.054548017673372924, | |
| "learning_rate": 1.5567730022765753e-06, | |
| "loss": 0.0445, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7767944936086529, | |
| "grad_norm": 0.04894253403708729, | |
| "learning_rate": 1.434216233685441e-06, | |
| "loss": 0.0416, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7866273352999017, | |
| "grad_norm": 0.053478489572820836, | |
| "learning_rate": 1.3158717680582128e-06, | |
| "loss": 0.0442, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7964601769911505, | |
| "grad_norm": 0.05243524821938133, | |
| "learning_rate": 1.201879407126012e-06, | |
| "loss": 0.0432, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8062930186823992, | |
| "grad_norm": 0.05003461970801258, | |
| "learning_rate": 1.0923738114266824e-06, | |
| "loss": 0.0426, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.816125860373648, | |
| "grad_norm": 0.0533532507343424, | |
| "learning_rate": 9.874843412286994e-07, | |
| "loss": 0.0423, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8259587020648967, | |
| "grad_norm": 0.05042011232647438, | |
| "learning_rate": 8.87334903716332e-07, | |
| "loss": 0.0431, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8357915437561455, | |
| "grad_norm": 0.04680039021213863, | |
| "learning_rate": 7.920438066166097e-07, | |
| "loss": 0.0451, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8456243854473943, | |
| "grad_norm": 0.05398686051710015, | |
| "learning_rate": 7.017236184409859e-07, | |
| "loss": 0.043, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.855457227138643, | |
| "grad_norm": 0.04981702076214587, | |
| "learning_rate": 6.164810355068179e-07, | |
| "loss": 0.0421, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8652900688298918, | |
| "grad_norm": 0.048895084051078515, | |
| "learning_rate": 5.364167558957267e-07, | |
| "loss": 0.0424, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8751229105211407, | |
| "grad_norm": 0.04967103561868963, | |
| "learning_rate": 4.6162536049775387e-07, | |
| "loss": 0.0407, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8849557522123894, | |
| "grad_norm": 0.05409813455002471, | |
| "learning_rate": 3.9219520128182087e-07, | |
| "loss": 0.0446, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8947885939036382, | |
| "grad_norm": 0.05188768440157575, | |
| "learning_rate": 3.2820829692449984e-07, | |
| "loss": 0.0414, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.904621435594887, | |
| "grad_norm": 0.056986853395008884, | |
| "learning_rate": 2.697402359203638e-07, | |
| "loss": 0.0443, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9144542772861357, | |
| "grad_norm": 0.051222887880893075, | |
| "learning_rate": 2.1686008728840301e-07, | |
| "loss": 0.0388, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9242871189773845, | |
| "grad_norm": 0.05162686496359886, | |
| "learning_rate": 1.6963031897995863e-07, | |
| "loss": 0.0413, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9341199606686332, | |
| "grad_norm": 0.05174506617392384, | |
| "learning_rate": 1.28106724084594e-07, | |
| "loss": 0.0427, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.943952802359882, | |
| "grad_norm": 0.05143864239478959, | |
| "learning_rate": 9.233835492104326e-08, | |
| "loss": 0.044, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9537856440511308, | |
| "grad_norm": 0.051746575729352574, | |
| "learning_rate": 6.236746509112824e-08, | |
| "loss": 0.0438, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.9636184857423795, | |
| "grad_norm": 0.04824943263655459, | |
| "learning_rate": 3.8229459565070074e-08, | |
| "loss": 0.0396, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9734513274336283, | |
| "grad_norm": 0.05484892399635814, | |
| "learning_rate": 1.99528528571763e-08, | |
| "loss": 0.0408, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.983284169124877, | |
| "grad_norm": 0.0494199239035283, | |
| "learning_rate": 7.559235341302872e-09, | |
| "loss": 0.0415, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9931170108161259, | |
| "grad_norm": 0.05265167499751187, | |
| "learning_rate": 1.0632477458888401e-09, | |
| "loss": 0.0415, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9990167158308751, | |
| "step": 508, | |
| "total_flos": 2.592513406535729e+18, | |
| "train_loss": 0.07262561121207523, | |
| "train_runtime": 7572.8833, | |
| "train_samples_per_second": 3.223, | |
| "train_steps_per_second": 0.067 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 508, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.592513406535729e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |